Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- AITextDetector.ipynb +0 -0
- AITextDetector/.gradio/certificate.pem +31 -0
- AITextDetector/AITextDetector/COLAB_DEPLOY.md +131 -0
- AITextDetector/AITextDetector/DEPLOY.md +153 -0
- AITextDetector/AITextDetector/README.md +35 -3
- AITextDetector/AITextDetector/deploy.sh +19 -0
- AITextDetector/README.md +3 -35
- README.md +29 -3
- ai_text_detector/models.py +176 -4
- gradio_app.py +10 -23
AITextDetector.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
AITextDetector/.gradio/certificate.pem
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-----BEGIN CERTIFICATE-----
|
| 2 |
+
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
|
| 3 |
+
TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
|
| 4 |
+
cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
|
| 5 |
+
WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
|
| 6 |
+
ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
|
| 7 |
+
MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
|
| 8 |
+
h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
|
| 9 |
+
0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
|
| 10 |
+
A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
|
| 11 |
+
T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
|
| 12 |
+
B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
|
| 13 |
+
B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
|
| 14 |
+
KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
|
| 15 |
+
OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
|
| 16 |
+
jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
|
| 17 |
+
qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
|
| 18 |
+
rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
|
| 19 |
+
HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
|
| 20 |
+
hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
|
| 21 |
+
ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
|
| 22 |
+
3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
|
| 23 |
+
NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
|
| 24 |
+
ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
|
| 25 |
+
TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
|
| 26 |
+
jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
|
| 27 |
+
oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
|
| 28 |
+
4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
|
| 29 |
+
mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
|
| 30 |
+
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
|
| 31 |
+
-----END CERTIFICATE-----
|
AITextDetector/AITextDetector/COLAB_DEPLOY.md
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🚀 Deploy to Hugging Face Spaces from Google Colab
|
| 2 |
+
|
| 3 |
+
Step-by-step guide to deploy your AI Text Detector app permanently to Hugging Face Spaces, all from Google Colab!
|
| 4 |
+
|
| 5 |
+
## Prerequisites
|
| 6 |
+
|
| 7 |
+
1. **Hugging Face Account**: Create one at [huggingface.co/join](https://huggingface.co/join)
|
| 8 |
+
2. **Access Token**: Get your token from [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
|
| 9 |
+
- Click "New token"
|
| 10 |
+
- Name it (e.g., "colab-deploy")
|
| 11 |
+
- Select "Write" permissions
|
| 12 |
+
- Copy the token (you'll need it!)
|
| 13 |
+
|
| 14 |
+
## Step-by-Step Deployment
|
| 15 |
+
|
| 16 |
+
### Step 1: Open Google Colab
|
| 17 |
+
|
| 18 |
+
Go to [colab.research.google.com](https://colab.research.google.com/) and create a new notebook.
|
| 19 |
+
|
| 20 |
+
### Step 2: Install Dependencies
|
| 21 |
+
|
| 22 |
+
```python
|
| 23 |
+
!pip install -q gradio huggingface_hub transformers torch pandas
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
### Step 3: Clone Your Repository
|
| 27 |
+
|
| 28 |
+
```python
|
| 29 |
+
!git clone https://github.com/ChauHPham/AITextDetector.git
|
| 30 |
+
%cd AITextDetector
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
### Step 4: Login to Hugging Face
|
| 34 |
+
|
| 35 |
+
```python
|
| 36 |
+
from huggingface_hub import login
|
| 37 |
+
|
| 38 |
+
# Paste your token when prompted
|
| 39 |
+
login()
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
**When prompted**, paste your Hugging Face token and press Enter.
|
| 43 |
+
|
| 44 |
+
### Step 5: Deploy!
|
| 45 |
+
|
| 46 |
+
```python
|
| 47 |
+
!gradio deploy
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
**Follow the interactive prompts:**
|
| 51 |
+
|
| 52 |
+
1. **Enter your Hugging Face username** (e.g., `yourusername`)
|
| 53 |
+
2. **Enter a Space name** (e.g., `ai-text-detector`)
|
| 54 |
+
- This will create: `https://huggingface.co/spaces/yourusername/ai-text-detector`
|
| 55 |
+
3. **Wait for deployment** (~5-10 minutes)
|
| 56 |
+
- Gradio will upload your files
|
| 57 |
+
- Hugging Face will build and deploy your app
|
| 58 |
+
|
| 59 |
+
### Step 6: Access Your App!
|
| 60 |
+
|
| 61 |
+
Once deployment completes, you'll see:
|
| 62 |
+
```
|
| 63 |
+
✅ Your app is live at: https://huggingface.co/spaces/yourusername/ai-text-detector
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
**Your app is now permanently hosted for free!** 🎉
|
| 67 |
+
|
| 68 |
+
---
|
| 69 |
+
|
| 70 |
+
## Complete Colab Notebook Code
|
| 71 |
+
|
| 72 |
+
Copy-paste this entire block into a Colab cell:
|
| 73 |
+
|
| 74 |
+
```python
|
| 75 |
+
# Install dependencies
|
| 76 |
+
!pip install -q gradio huggingface_hub transformers torch pandas
|
| 77 |
+
|
| 78 |
+
# Clone repository
|
| 79 |
+
!git clone https://github.com/ChauHPham/AITextDetector.git
|
| 80 |
+
%cd AITextDetector
|
| 81 |
+
|
| 82 |
+
# Login to Hugging Face
|
| 83 |
+
from huggingface_hub import login
|
| 84 |
+
login() # Paste your token here
|
| 85 |
+
|
| 86 |
+
# Deploy!
|
| 87 |
+
!gradio deploy
|
| 88 |
+
```
|
| 89 |
+
|
| 90 |
+
---
|
| 91 |
+
|
| 92 |
+
## Troubleshooting
|
| 93 |
+
|
| 94 |
+
### "Token not found" error
|
| 95 |
+
- Make sure you copied the full token from Hugging Face
|
| 96 |
+
- Tokens start with `hf_...`
|
| 97 |
+
|
| 98 |
+
### "Space already exists" error
|
| 99 |
+
- Choose a different Space name
|
| 100 |
+
- Or delete the existing Space from [huggingface.co/spaces](https://huggingface.co/spaces)
|
| 101 |
+
|
| 102 |
+
### Deployment takes too long
|
| 103 |
+
- Normal deployment takes 5-10 minutes
|
| 104 |
+
- Check the build logs in Hugging Face Spaces dashboard
|
| 105 |
+
|
| 106 |
+
### Want to update your app?
|
| 107 |
+
- Just run `!gradio deploy` again from Colab
|
| 108 |
+
- It will update the existing Space
|
| 109 |
+
|
| 110 |
+
---
|
| 111 |
+
|
| 112 |
+
## Benefits of Hugging Face Spaces
|
| 113 |
+
|
| 114 |
+
✅ **Free permanent hosting**
|
| 115 |
+
✅ **No expiration** (unlike Colab public links)
|
| 116 |
+
✅ **Shareable URL** that works forever
|
| 117 |
+
✅ **Automatic updates** when you push code
|
| 118 |
+
✅ **GPU support** (free tier available)
|
| 119 |
+
|
| 120 |
+
---
|
| 121 |
+
|
| 122 |
+
## Next Steps
|
| 123 |
+
|
| 124 |
+
After deployment:
|
| 125 |
+
1. Share your Space URL with others
|
| 126 |
+
2. Customize your Space's README.md
|
| 127 |
+
3. Add a Space card to your GitHub README
|
| 128 |
+
4. Update your app anytime by running `gradio deploy` again
|
| 129 |
+
|
| 130 |
+
Enjoy your permanently hosted AI Text Detector! 🚀
|
| 131 |
+
|
AITextDetector/AITextDetector/DEPLOY.md
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🚀 Deployment Guide
|
| 2 |
+
|
| 3 |
+
## Google Colab (Recommended for Mac M2)
|
| 4 |
+
|
| 5 |
+
**Perfect for Mac M2 users** - avoids PyTorch MPS mutex lock issues!
|
| 6 |
+
|
| 7 |
+
### Quick Start
|
| 8 |
+
|
| 9 |
+
1. Open [Google Colab](https://colab.research.google.com/)
|
| 10 |
+
2. Create a new notebook
|
| 11 |
+
3. Run:
|
| 12 |
+
|
| 13 |
+
```python
|
| 14 |
+
!pip install -q transformers torch pandas gradio kagglehub
|
| 15 |
+
!git clone https://github.com/ChauHPham/AITextDetector.git
|
| 16 |
+
%cd AITextDetector
|
| 17 |
+
!git checkout main
|
| 18 |
+
!python gradio_app.py
|
| 19 |
+
```
|
| 20 |
+
|
| 21 |
+
4. **Get your public link**: After running, you'll see:
|
| 22 |
+
```
|
| 23 |
+
* Running on public URL: https://xxxxx.gradio.live
|
| 24 |
+
```
|
| 25 |
+
This link is shareable and works as long as the Colab notebook is running!
|
| 26 |
+
|
| 27 |
+
### Keep It Running
|
| 28 |
+
|
| 29 |
+
- Enable "Keep runtime alive" in Colab's runtime settings
|
| 30 |
+
- The public link expires after 1 week of inactivity
|
| 31 |
+
- For permanent hosting, use Hugging Face Spaces (see below)
|
| 32 |
+
|
| 33 |
+
---
|
| 34 |
+
|
| 35 |
+
## Hugging Face Spaces (Permanent Hosting)
|
| 36 |
+
|
| 37 |
+
Deploy your app permanently to Hugging Face Spaces for free!
|
| 38 |
+
|
| 39 |
+
### Option 1: Deploy from Google Colab
|
| 40 |
+
|
| 41 |
+
**Perfect for Mac M2 users** - deploy directly from Colab!
|
| 42 |
+
|
| 43 |
+
```python
|
| 44 |
+
# 1. Install dependencies
|
| 45 |
+
!pip install -q gradio huggingface_hub
|
| 46 |
+
|
| 47 |
+
# 2. Clone your repo (if not already done)
|
| 48 |
+
!git clone https://github.com/ChauHPham/AITextDetector.git
|
| 49 |
+
%cd AITextDetector
|
| 50 |
+
|
| 51 |
+
# 3. Login to Hugging Face (you'll need a token)
|
| 52 |
+
# Get your token from: https://huggingface.co/settings/tokens
|
| 53 |
+
from huggingface_hub import login
|
| 54 |
+
login() # Paste your token when prompted
|
| 55 |
+
|
| 56 |
+
# 4. Deploy!
|
| 57 |
+
!gradio deploy
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
**Follow the prompts:**
|
| 61 |
+
1. Enter your Hugging Face username
|
| 62 |
+
2. Choose/create a Space name (e.g., `ai-text-detector`)
|
| 63 |
+
3. Wait for deployment (~5-10 minutes)
|
| 64 |
+
|
| 65 |
+
Your app will be live at: `https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME`
|
| 66 |
+
|
| 67 |
+
### Option 2: Using Gradio CLI (Local)
|
| 68 |
+
|
| 69 |
+
```bash
|
| 70 |
+
# Install gradio if not already installed
|
| 71 |
+
pip install gradio
|
| 72 |
+
|
| 73 |
+
# Deploy from your project directory
|
| 74 |
+
gradio deploy
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
Follow the prompts to:
|
| 78 |
+
1. Login to Hugging Face (or create account)
|
| 79 |
+
2. Choose/create a Space
|
| 80 |
+
3. Deploy!
|
| 81 |
+
|
| 82 |
+
### Option 3: Manual Deployment
|
| 83 |
+
|
| 84 |
+
1. Create a new Space on [Hugging Face Spaces](https://huggingface.co/spaces)
|
| 85 |
+
2. Choose "Gradio" as the SDK
|
| 86 |
+
3. Upload your files:
|
| 87 |
+
- `gradio_app.py`
|
| 88 |
+
- `ai_text_detector/` (entire package)
|
| 89 |
+
- `requirements.txt`
|
| 90 |
+
- `README.md`
|
| 91 |
+
4. Add a `README.md` in the Space with:
|
| 92 |
+
```yaml
|
| 93 |
+
---
|
| 94 |
+
title: AI Text Detector
|
| 95 |
+
emoji: 🔍
|
| 96 |
+
colorFrom: blue
|
| 97 |
+
colorTo: purple
|
| 98 |
+
sdk: gradio
|
| 99 |
+
app_file: gradio_app.py
|
| 100 |
+
pinned: false
|
| 101 |
+
---
|
| 102 |
+
```
|
| 103 |
+
5. The Space will automatically build and deploy!
|
| 104 |
+
|
| 105 |
+
---
|
| 106 |
+
|
| 107 |
+
## Local Deployment
|
| 108 |
+
|
| 109 |
+
### Requirements
|
| 110 |
+
|
| 111 |
+
- Python 3.8+
|
| 112 |
+
- See `requirements.txt`
|
| 113 |
+
|
| 114 |
+
### Run Locally
|
| 115 |
+
|
| 116 |
+
```bash
|
| 117 |
+
# Install dependencies
|
| 118 |
+
pip install -r requirements.txt
|
| 119 |
+
pip install -e .
|
| 120 |
+
|
| 121 |
+
# Run Gradio app
|
| 122 |
+
python gradio_app.py
|
| 123 |
+
```
|
| 124 |
+
|
| 125 |
+
**Note for Mac M2 users**: Local training may fail due to PyTorch MPS bugs. Use Google Colab for training instead.
|
| 126 |
+
|
| 127 |
+
---
|
| 128 |
+
|
| 129 |
+
## Docker Deployment
|
| 130 |
+
|
| 131 |
+
```bash
|
| 132 |
+
# Build
|
| 133 |
+
docker build -t ai-text-detector .
|
| 134 |
+
|
| 135 |
+
# Run
|
| 136 |
+
docker run -p 7860:7860 ai-text-detector
|
| 137 |
+
```
|
| 138 |
+
|
| 139 |
+
---
|
| 140 |
+
|
| 141 |
+
## Troubleshooting
|
| 142 |
+
|
| 143 |
+
### Mac M2 Issues
|
| 144 |
+
|
| 145 |
+
If you encounter `mutex.cc lock blocking` errors on Mac M2:
|
| 146 |
+
- ✅ **Use Google Colab** (recommended)
|
| 147 |
+
- ✅ Use Docker with Linux base image
|
| 148 |
+
- ❌ Local training may not work due to PyTorch MPS bugs
|
| 149 |
+
|
| 150 |
+
### Model Loading Issues
|
| 151 |
+
|
| 152 |
+
The app automatically uses the Desklib pre-trained model if no trained model is found. The model downloads automatically on first use (~1.7GB).
|
| 153 |
+
|
AITextDetector/AITextDetector/README.md
CHANGED
|
@@ -1,8 +1,31 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
A learning project for detecting AI-generated vs. human-written text with a modular Python package, YAML configs, GPU auto-detection, and a
|
| 4 |
|
| 5 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
```bash
|
| 8 |
# 1) Create & activate a virtualenv (recommended)
|
|
@@ -46,3 +69,12 @@ See `configs/default.yaml`. Key fields:
|
|
| 46 |
* Labels standardized to `0=human`, `1=ai`.
|
| 47 |
* Mixed precision (fp16) auto-enables on CUDA.
|
| 48 |
* Evaluate with accuracy, macro-F1, and confusion matrix.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: AITextDetector
|
| 3 |
+
app_file: gradio_app.py
|
| 4 |
+
sdk: gradio
|
| 5 |
+
sdk_version: 5.49.1
|
| 6 |
+
---
|
| 7 |
+
# AI Text Detector
|
| 8 |
|
| 9 |
+
A learning project for detecting AI-generated vs. human-written text with a modular Python package, YAML configs, GPU auto-detection, CLI, and a **Gradio web interface**.
|
| 10 |
|
| 11 |
+
## 🌐 Web Interface (Gradio)
|
| 12 |
+
|
| 13 |
+
**Try it now on Google Colab** (works perfectly on Mac M2!):
|
| 14 |
+
|
| 15 |
+
```python
|
| 16 |
+
!pip install -q transformers torch pandas gradio kagglehub
|
| 17 |
+
!git clone https://github.com/ChauHPham/AITextDetector.git
|
| 18 |
+
%cd AITextDetector
|
| 19 |
+
!python gradio_app.py
|
| 20 |
+
```
|
| 21 |
+
|
| 22 |
+
Get a **public shareable link** instantly! See [DEPLOY.md](DEPLOY.md) for deployment options.
|
| 23 |
+
|
| 24 |
+
### 🍎 Mac M2 Users
|
| 25 |
+
|
| 26 |
+
**Google Colab is recommended** - local training may fail due to PyTorch MPS mutex lock issues. The Gradio app works great in Colab with free GPU!
|
| 27 |
+
|
| 28 |
+
## Quickstart (CLI)
|
| 29 |
|
| 30 |
```bash
|
| 31 |
# 1) Create & activate a virtualenv (recommended)
|
|
|
|
| 69 |
* Labels standardized to `0=human`, `1=ai`.
|
| 70 |
* Mixed precision (fp16) auto-enables on CUDA.
|
| 71 |
* Evaluate with accuracy, macro-F1, and confusion matrix.
|
| 72 |
+
* **Mac M2 users**: Use Google Colab for training (see above) to avoid PyTorch MPS bugs.
|
| 73 |
+
|
| 74 |
+
## Deployment
|
| 75 |
+
|
| 76 |
+
See [DEPLOY.md](DEPLOY.md) for:
|
| 77 |
+
- Google Colab setup (recommended for Mac M2)
|
| 78 |
+
- Hugging Face Spaces deployment (`gradio deploy`)
|
| 79 |
+
- Docker deployment
|
| 80 |
+
- Troubleshooting guide
|
AITextDetector/AITextDetector/deploy.sh
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Quick deployment script for Hugging Face Spaces
|
| 3 |
+
|
| 4 |
+
echo "🚀 Deploying AI Text Detector to Hugging Face Spaces..."
|
| 5 |
+
echo ""
|
| 6 |
+
echo "Make sure you have:"
|
| 7 |
+
echo " 1. Hugging Face account (https://huggingface.co/join)"
|
| 8 |
+
echo " 2. Gradio installed (pip install gradio)"
|
| 9 |
+
echo " 3. Hugging Face CLI installed (pip install huggingface_hub)"
|
| 10 |
+
echo ""
|
| 11 |
+
read -p "Press Enter to continue or Ctrl+C to cancel..."
|
| 12 |
+
|
| 13 |
+
# Deploy using Gradio CLI
|
| 14 |
+
gradio deploy
|
| 15 |
+
|
| 16 |
+
echo ""
|
| 17 |
+
echo "✅ Deployment complete!"
|
| 18 |
+
echo "Your app will be available at: https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME"
|
| 19 |
+
|
AITextDetector/README.md
CHANGED
|
@@ -1,31 +1,8 @@
|
|
| 1 |
-
|
| 2 |
-
title: AITextDetector
|
| 3 |
-
app_file: gradio_app.py
|
| 4 |
-
sdk: gradio
|
| 5 |
-
sdk_version: 5.49.1
|
| 6 |
-
---
|
| 7 |
-
# AI Text Detector
|
| 8 |
|
| 9 |
-
A learning project for detecting AI-generated vs. human-written text with a modular Python package, YAML configs, GPU auto-detection,
|
| 10 |
|
| 11 |
-
##
|
| 12 |
-
|
| 13 |
-
**Try it now on Google Colab** (works perfectly on Mac M2!):
|
| 14 |
-
|
| 15 |
-
```python
|
| 16 |
-
!pip install -q transformers torch pandas gradio kagglehub
|
| 17 |
-
!git clone https://github.com/ChauHPham/AITextDetector.git
|
| 18 |
-
%cd AITextDetector
|
| 19 |
-
!python gradio_app.py
|
| 20 |
-
```
|
| 21 |
-
|
| 22 |
-
Get a **public shareable link** instantly! See [DEPLOY.md](DEPLOY.md) for deployment options.
|
| 23 |
-
|
| 24 |
-
### 🍎 Mac M2 Users
|
| 25 |
-
|
| 26 |
-
**Google Colab is recommended** - local training may fail due to PyTorch MPS mutex lock issues. The Gradio app works great in Colab with free GPU!
|
| 27 |
-
|
| 28 |
-
## Quickstart (CLI)
|
| 29 |
|
| 30 |
```bash
|
| 31 |
# 1) Create & activate a virtualenv (recommended)
|
|
@@ -69,12 +46,3 @@ See `configs/default.yaml`. Key fields:
|
|
| 69 |
* Labels standardized to `0=human`, `1=ai`.
|
| 70 |
* Mixed precision (fp16) auto-enables on CUDA.
|
| 71 |
* Evaluate with accuracy, macro-F1, and confusion matrix.
|
| 72 |
-
* **Mac M2 users**: Use Google Colab for training (see above) to avoid PyTorch MPS bugs.
|
| 73 |
-
|
| 74 |
-
## Deployment
|
| 75 |
-
|
| 76 |
-
See [DEPLOY.md](DEPLOY.md) for:
|
| 77 |
-
- Google Colab setup (recommended for Mac M2)
|
| 78 |
-
- Hugging Face Spaces deployment (`gradio deploy`)
|
| 79 |
-
- Docker deployment
|
| 80 |
-
- Troubleshooting guide
|
|
|
|
| 1 |
+
# AI Text Detector (CLI)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
A learning project for detecting AI-generated vs. human-written text with a modular Python package, YAML configs, GPU auto-detection, and a CLI.
|
| 4 |
|
| 5 |
+
## Quickstart
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
```bash
|
| 8 |
# 1) Create & activate a virtualenv (recommended)
|
|
|
|
| 46 |
* Labels standardized to `0=human`, `1=ai`.
|
| 47 |
* Mixed precision (fp16) auto-enables on CUDA.
|
| 48 |
* Evaluate with accuracy, macro-F1, and confusion matrix.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -4,11 +4,28 @@ app_file: gradio_app.py
|
|
| 4 |
sdk: gradio
|
| 5 |
sdk_version: 5.49.1
|
| 6 |
---
|
| 7 |
-
# AI Text Detector
|
| 8 |
|
| 9 |
-
A learning project for detecting AI-generated vs. human-written text with a modular Python package, YAML configs, GPU auto-detection, and a
|
| 10 |
|
| 11 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
```bash
|
| 14 |
# 1) Create & activate a virtualenv (recommended)
|
|
@@ -52,3 +69,12 @@ See `configs/default.yaml`. Key fields:
|
|
| 52 |
* Labels standardized to `0=human`, `1=ai`.
|
| 53 |
* Mixed precision (fp16) auto-enables on CUDA.
|
| 54 |
* Evaluate with accuracy, macro-F1, and confusion matrix.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
sdk: gradio
|
| 5 |
sdk_version: 5.49.1
|
| 6 |
---
|
| 7 |
+
# AI Text Detector
|
| 8 |
|
| 9 |
+
A learning project for detecting AI-generated vs. human-written text with a modular Python package, YAML configs, GPU auto-detection, CLI, and a **Gradio web interface**.
|
| 10 |
|
| 11 |
+
## 🌐 Web Interface (Gradio)
|
| 12 |
+
|
| 13 |
+
**Try it now on Google Colab** (works perfectly on Mac M2!):
|
| 14 |
+
|
| 15 |
+
```python
|
| 16 |
+
!pip install -q transformers torch pandas gradio kagglehub
|
| 17 |
+
!git clone https://github.com/ChauHPham/AITextDetector.git
|
| 18 |
+
%cd AITextDetector
|
| 19 |
+
!python gradio_app.py
|
| 20 |
+
```
|
| 21 |
+
|
| 22 |
+
Get a **public shareable link** instantly! See [DEPLOY.md](DEPLOY.md) for deployment options.
|
| 23 |
+
|
| 24 |
+
### 🍎 Mac M2 Users
|
| 25 |
+
|
| 26 |
+
**Google Colab is recommended** - local training may fail due to PyTorch MPS mutex lock issues. The Gradio app works great in Colab with free GPU!
|
| 27 |
+
|
| 28 |
+
## Quickstart (CLI)
|
| 29 |
|
| 30 |
```bash
|
| 31 |
# 1) Create & activate a virtualenv (recommended)
|
|
|
|
| 69 |
* Labels standardized to `0=human`, `1=ai`.
|
| 70 |
* Mixed precision (fp16) auto-enables on CUDA.
|
| 71 |
* Evaluate with accuracy, macro-F1, and confusion matrix.
|
| 72 |
+
* **Mac M2 users**: Use Google Colab for training (see above) to avoid PyTorch MPS bugs.
|
| 73 |
+
|
| 74 |
+
## Deployment
|
| 75 |
+
|
| 76 |
+
See [DEPLOY.md](DEPLOY.md) for:
|
| 77 |
+
- Google Colab setup (recommended for Mac M2)
|
| 78 |
+
- Hugging Face Spaces deployment (`gradio deploy`)
|
| 79 |
+
- Docker deployment
|
| 80 |
+
- Troubleshooting guide
|
ai_text_detector/models.py
CHANGED
|
@@ -1,16 +1,170 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
|
| 3 |
# Disable tokenizer parallelism and MPS on macOS
|
| 4 |
if os.getenv("TOKENIZERS_PARALLELISM") is None:
|
| 5 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 6 |
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
class DetectorModel:
|
| 10 |
-
def __init__(self, model_name="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
self.model_name = model_name
|
| 12 |
-
self.
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
def save(self, path: str):
|
| 16 |
self.model.save_pretrained(path)
|
|
@@ -18,10 +172,28 @@ class DetectorModel:
|
|
| 18 |
|
| 19 |
@classmethod
|
| 20 |
def load(cls, path: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
model = AutoModelForSequenceClassification.from_pretrained(path)
|
| 22 |
tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
|
| 23 |
obj = cls.__new__(cls)
|
| 24 |
obj.model_name = path
|
| 25 |
obj.model = model
|
| 26 |
obj.tokenizer = tokenizer
|
|
|
|
| 27 |
return obj
|
|
|
|
| 1 |
import os
|
| 2 |
+
import sys
|
| 3 |
|
| 4 |
# Disable tokenizer parallelism and MPS on macOS
|
| 5 |
if os.getenv("TOKENIZERS_PARALLELISM") is None:
|
| 6 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 7 |
|
| 8 |
+
import torch
|
| 9 |
+
import torch.nn as nn
|
| 10 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig, AutoModel, PreTrainedModel
|
| 11 |
+
|
| 12 |
+
class DesklibAIDetectionModel(PreTrainedModel):
|
| 13 |
+
"""Desklib AI Detection Model - Pre-trained model for AI text detection"""
|
| 14 |
+
config_class = AutoConfig
|
| 15 |
+
|
| 16 |
+
def __init__(self, config):
|
| 17 |
+
super().__init__(config)
|
| 18 |
+
# Initialize the base transformer model
|
| 19 |
+
self.model = AutoModel.from_config(config)
|
| 20 |
+
# Define a classifier head
|
| 21 |
+
self.classifier = nn.Linear(config.hidden_size, 1)
|
| 22 |
+
# Initialize weights
|
| 23 |
+
self.init_weights()
|
| 24 |
+
|
| 25 |
+
def forward(self, input_ids, attention_mask=None, labels=None):
|
| 26 |
+
# Forward pass through the transformer
|
| 27 |
+
outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
|
| 28 |
+
last_hidden_state = outputs[0]
|
| 29 |
+
|
| 30 |
+
# Mean pooling
|
| 31 |
+
input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
|
| 32 |
+
sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, dim=1)
|
| 33 |
+
sum_mask = torch.clamp(input_mask_expanded.sum(dim=1), min=1e-9)
|
| 34 |
+
pooled_output = sum_embeddings / sum_mask
|
| 35 |
+
|
| 36 |
+
# Classifier
|
| 37 |
+
logits = self.classifier(pooled_output)
|
| 38 |
+
|
| 39 |
+
loss = None
|
| 40 |
+
if labels is not None:
|
| 41 |
+
loss_fct = nn.BCEWithLogitsLoss()
|
| 42 |
+
loss = loss_fct(logits.view(-1), labels.float())
|
| 43 |
+
|
| 44 |
+
output = {"logits": logits}
|
| 45 |
+
if loss is not None:
|
| 46 |
+
output["loss"] = loss
|
| 47 |
+
return output
|
| 48 |
|
| 49 |
class DetectorModel:
|
| 50 |
+
def __init__(self, model_name="desklib/ai-text-detector-v1.01", use_desklib=True):
|
| 51 |
+
"""
|
| 52 |
+
Initialize detector model.
|
| 53 |
+
|
| 54 |
+
Args:
|
| 55 |
+
model_name: Model name or path. Defaults to Desklib pre-trained model.
|
| 56 |
+
use_desklib: If True, use Desklib model architecture. If False, use standard classification.
|
| 57 |
+
"""
|
| 58 |
self.model_name = model_name
|
| 59 |
+
self.use_desklib = use_desklib
|
| 60 |
+
|
| 61 |
+
if use_desklib and "desklib" in model_name:
|
| 62 |
+
# Try to load Desklib model, but fallback if MPS issues occur
|
| 63 |
+
if sys.platform == "darwin":
|
| 64 |
+
# On macOS: try multiple loading strategies
|
| 65 |
+
try:
|
| 66 |
+
# Strategy 1: Load with low_cpu_mem_usage and explicit CPU
|
| 67 |
+
print("Attempting to load Desklib model...")
|
| 68 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 69 |
+
config = AutoConfig.from_pretrained(model_name)
|
| 70 |
+
|
| 71 |
+
# Try loading with safetensors if available
|
| 72 |
+
try:
|
| 73 |
+
from transformers import AutoModel
|
| 74 |
+
# Load base model first
|
| 75 |
+
base_model = AutoModel.from_pretrained(
|
| 76 |
+
model_name,
|
| 77 |
+
torch_dtype=torch.float32,
|
| 78 |
+
low_cpu_mem_usage=True,
|
| 79 |
+
device_map="cpu"
|
| 80 |
+
)
|
| 81 |
+
# Create Desklib model wrapper
|
| 82 |
+
self.model = DesklibAIDetectionModel(config)
|
| 83 |
+
self.model.model = base_model
|
| 84 |
+
self.model = self.model.to("cpu")
|
| 85 |
+
# Load classifier weights
|
| 86 |
+
from transformers.utils import cached_file
|
| 87 |
+
try:
|
| 88 |
+
classifier_path = cached_file(model_name, "pytorch_model.bin")
|
| 89 |
+
state_dict = torch.load(classifier_path, map_location="cpu")
|
| 90 |
+
# Only load classifier weights
|
| 91 |
+
classifier_dict = {k: v for k, v in state_dict.items() if "classifier" in k}
|
| 92 |
+
if classifier_dict:
|
| 93 |
+
self.model.load_state_dict(classifier_dict, strict=False)
|
| 94 |
+
except:
|
| 95 |
+
pass # Use initialized classifier
|
| 96 |
+
self.model.eval()
|
| 97 |
+
print("✅ Desklib model loaded successfully!")
|
| 98 |
+
except Exception as e:
|
| 99 |
+
print(f"⚠️ Desklib model loading failed: {e}")
|
| 100 |
+
print("Falling back to DistilBERT model...")
|
| 101 |
+
raise
|
| 102 |
+
except:
|
| 103 |
+
# Fallback to a smaller, simpler model
|
| 104 |
+
print("Using DistilBERT as fallback (smaller, more compatible)")
|
| 105 |
+
self.use_desklib = False
|
| 106 |
+
self.model = AutoModelForSequenceClassification.from_pretrained(
|
| 107 |
+
"distilbert-base-uncased",
|
| 108 |
+
num_labels=2
|
| 109 |
+
)
|
| 110 |
+
self.tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
|
| 111 |
+
self.model = self.model.to("cpu")
|
| 112 |
+
else:
|
| 113 |
+
# Non-macOS: standard loading
|
| 114 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 115 |
+
config = AutoConfig.from_pretrained(model_name)
|
| 116 |
+
self.model = DesklibAIDetectionModel.from_pretrained(model_name)
|
| 117 |
+
else:
|
| 118 |
+
# Fallback to standard classification model
|
| 119 |
+
self.model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
|
| 120 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
| 121 |
+
self.use_desklib = False
|
| 122 |
+
|
| 123 |
+
def predict(self, text, max_length=768, threshold=0.5):
|
| 124 |
+
"""
|
| 125 |
+
Predict if text is AI-generated.
|
| 126 |
+
|
| 127 |
+
Args:
|
| 128 |
+
text: Input text to classify
|
| 129 |
+
max_length: Maximum sequence length
|
| 130 |
+
threshold: Probability threshold for classification
|
| 131 |
+
|
| 132 |
+
Returns:
|
| 133 |
+
tuple: (probability, label) where label is 1 for AI-generated, 0 for human
|
| 134 |
+
"""
|
| 135 |
+
# Tokenize
|
| 136 |
+
encoded = self.tokenizer(
|
| 137 |
+
text,
|
| 138 |
+
padding='max_length',
|
| 139 |
+
truncation=True,
|
| 140 |
+
max_length=max_length,
|
| 141 |
+
return_tensors='pt'
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
input_ids = encoded['input_ids']
|
| 145 |
+
attention_mask = encoded['attention_mask']
|
| 146 |
+
|
| 147 |
+
# Get device
|
| 148 |
+
device = next(self.model.parameters()).device
|
| 149 |
+
input_ids = input_ids.to(device)
|
| 150 |
+
attention_mask = attention_mask.to(device)
|
| 151 |
+
|
| 152 |
+
# Predict
|
| 153 |
+
self.model.eval()
|
| 154 |
+
with torch.no_grad():
|
| 155 |
+
if self.use_desklib:
|
| 156 |
+
outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
|
| 157 |
+
logits = outputs["logits"]
|
| 158 |
+
probability = torch.sigmoid(logits).item()
|
| 159 |
+
else:
|
| 160 |
+
outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
|
| 161 |
+
probs = torch.softmax(outputs.logits, dim=1)
|
| 162 |
+
# For standard models: prob[0] = human, prob[1] = AI
|
| 163 |
+
probability = probs[0][1].item()
|
| 164 |
+
|
| 165 |
+
label = 1 if probability >= threshold else 0
|
| 166 |
+
|
| 167 |
+
return probability, label
|
| 168 |
|
| 169 |
def save(self, path: str):
|
| 170 |
self.model.save_pretrained(path)
|
|
|
|
| 172 |
|
| 173 |
@classmethod
|
| 174 |
def load(cls, path: str):
|
| 175 |
+
# Try to detect if it's a Desklib model
|
| 176 |
+
try:
|
| 177 |
+
config = AutoConfig.from_pretrained(path)
|
| 178 |
+
# Check if it has the Desklib architecture
|
| 179 |
+
if hasattr(config, 'model_type') and 'deberta' in config.model_type.lower():
|
| 180 |
+
model = DesklibAIDetectionModel.from_pretrained(path)
|
| 181 |
+
tokenizer = AutoTokenizer.from_pretrained(path)
|
| 182 |
+
obj = cls.__new__(cls)
|
| 183 |
+
obj.model_name = path
|
| 184 |
+
obj.model = model
|
| 185 |
+
obj.tokenizer = tokenizer
|
| 186 |
+
obj.use_desklib = True
|
| 187 |
+
return obj
|
| 188 |
+
except:
|
| 189 |
+
pass
|
| 190 |
+
|
| 191 |
+
# Fallback to standard model
|
| 192 |
model = AutoModelForSequenceClassification.from_pretrained(path)
|
| 193 |
tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
|
| 194 |
obj = cls.__new__(cls)
|
| 195 |
obj.model_name = path
|
| 196 |
obj.model = model
|
| 197 |
obj.tokenizer = tokenizer
|
| 198 |
+
obj.use_desklib = False
|
| 199 |
return obj
|
gradio_app.py
CHANGED
|
@@ -46,13 +46,13 @@ def load_model():
|
|
| 46 |
tokenizer = model.tokenizer
|
| 47 |
except Exception as e:
|
| 48 |
print(f"Failed to load model: {e}")
|
| 49 |
-
print("Using
|
| 50 |
-
model = DetectorModel("
|
| 51 |
tokenizer = model.tokenizer
|
| 52 |
else:
|
| 53 |
-
print("No trained model found. Using
|
| 54 |
-
# Use
|
| 55 |
-
model = DetectorModel("
|
| 56 |
tokenizer = model.tokenizer
|
| 57 |
|
| 58 |
# Load model lazily (on first use) to avoid startup issues
|
|
@@ -76,29 +76,16 @@ def detect_text(text):
|
|
| 76 |
return "Please enter some text to analyze."
|
| 77 |
|
| 78 |
try:
|
| 79 |
-
#
|
| 80 |
-
|
| 81 |
-
text,
|
| 82 |
-
truncation=True,
|
| 83 |
-
padding="max_length",
|
| 84 |
-
max_length=256,
|
| 85 |
-
return_tensors="pt"
|
| 86 |
-
)
|
| 87 |
-
|
| 88 |
-
# Get prediction
|
| 89 |
-
with torch.no_grad():
|
| 90 |
-
outputs = model.model(**inputs)
|
| 91 |
-
probabilities = torch.softmax(outputs.logits, dim=1)
|
| 92 |
-
human_prob = probabilities[0][0].item()
|
| 93 |
-
ai_prob = probabilities[0][1].item()
|
| 94 |
|
| 95 |
# Determine prediction
|
| 96 |
-
if
|
| 97 |
label = "🤖 AI-generated"
|
| 98 |
confidence = ai_prob
|
| 99 |
else:
|
| 100 |
label = "🧑 Human-written"
|
| 101 |
-
confidence =
|
| 102 |
|
| 103 |
return f"{label} (confidence: {confidence:.1%})"
|
| 104 |
|
|
@@ -161,4 +148,4 @@ with gr.Blocks(title="AI Text Detector", theme=gr.themes.Soft()) as app:
|
|
| 161 |
)
|
| 162 |
|
| 163 |
if __name__ == "__main__":
|
| 164 |
-
app.launch(share=
|
|
|
|
| 46 |
tokenizer = model.tokenizer
|
| 47 |
except Exception as e:
|
| 48 |
print(f"Failed to load model: {e}")
|
| 49 |
+
print("Using Desklib pre-trained model instead.")
|
| 50 |
+
model = DetectorModel("desklib/ai-text-detector-v1.01", use_desklib=True)
|
| 51 |
tokenizer = model.tokenizer
|
| 52 |
else:
|
| 53 |
+
print("No trained model found. Using Desklib pre-trained AI detector model.")
|
| 54 |
+
# Use Desklib pre-trained model (no training needed!)
|
| 55 |
+
model = DetectorModel("desklib/ai-text-detector-v1.01", use_desklib=True)
|
| 56 |
tokenizer = model.tokenizer
|
| 57 |
|
| 58 |
# Load model lazily (on first use) to avoid startup issues
|
|
|
|
| 76 |
return "Please enter some text to analyze."
|
| 77 |
|
| 78 |
try:
|
| 79 |
+
# Use the model's predict method
|
| 80 |
+
ai_prob, predicted_label = model.predict(text, max_length=768, threshold=0.5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
# Determine prediction
|
| 83 |
+
if predicted_label == 1:
|
| 84 |
label = "🤖 AI-generated"
|
| 85 |
confidence = ai_prob
|
| 86 |
else:
|
| 87 |
label = "🧑 Human-written"
|
| 88 |
+
confidence = 1 - ai_prob # Human probability is 1 - AI probability
|
| 89 |
|
| 90 |
return f"{label} (confidence: {confidence:.1%})"
|
| 91 |
|
|
|
|
| 148 |
)
|
| 149 |
|
| 150 |
if __name__ == "__main__":
|
| 151 |
+
app.launch(share=True, server_name="0.0.0.0", server_port=7860)
|