ChauHPham commited on
Commit
84d4bbc
·
verified ·
1 Parent(s): 072b539

Upload folder using huggingface_hub

Browse files
AITextDetector.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
AITextDetector/.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
AITextDetector/AITextDetector/COLAB_DEPLOY.md ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🚀 Deploy to Hugging Face Spaces from Google Colab
2
+
3
+ Step-by-step guide to deploy your AI Text Detector app permanently to Hugging Face Spaces, all from Google Colab!
4
+
5
+ ## Prerequisites
6
+
7
+ 1. **Hugging Face Account**: Create one at [huggingface.co/join](https://huggingface.co/join)
8
+ 2. **Access Token**: Get your token from [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
9
+ - Click "New token"
10
+ - Name it (e.g., "colab-deploy")
11
+ - Select "Write" permissions
12
+ - Copy the token (you'll need it!)
13
+
14
+ ## Step-by-Step Deployment
15
+
16
+ ### Step 1: Open Google Colab
17
+
18
+ Go to [colab.research.google.com](https://colab.research.google.com/) and create a new notebook.
19
+
20
+ ### Step 2: Install Dependencies
21
+
22
+ ```python
23
+ !pip install -q gradio huggingface_hub transformers torch pandas
24
+ ```
25
+
26
+ ### Step 3: Clone Your Repository
27
+
28
+ ```python
29
+ !git clone https://github.com/ChauHPham/AITextDetector.git
30
+ %cd AITextDetector
31
+ ```
32
+
33
+ ### Step 4: Login to Hugging Face
34
+
35
+ ```python
36
+ from huggingface_hub import login
37
+
38
+ # Paste your token when prompted
39
+ login()
40
+ ```
41
+
42
+ **When prompted**, paste your Hugging Face token and press Enter.
43
+
44
+ ### Step 5: Deploy!
45
+
46
+ ```python
47
+ !gradio deploy
48
+ ```
49
+
50
+ **Follow the interactive prompts:**
51
+
52
+ 1. **Enter your Hugging Face username** (e.g., `yourusername`)
53
+ 2. **Enter a Space name** (e.g., `ai-text-detector`)
54
+ - This will create: `https://huggingface.co/spaces/yourusername/ai-text-detector`
55
+ 3. **Wait for deployment** (~5-10 minutes)
56
+ - Gradio will upload your files
57
+ - Hugging Face will build and deploy your app
58
+
59
+ ### Step 6: Access Your App!
60
+
61
+ Once deployment completes, you'll see:
62
+ ```
63
+ ✅ Your app is live at: https://huggingface.co/spaces/yourusername/ai-text-detector
64
+ ```
65
+
66
+ **Your app is now permanently hosted for free!** 🎉
67
+
68
+ ---
69
+
70
+ ## Complete Colab Notebook Code
71
+
72
+ Copy-paste this entire block into a Colab cell:
73
+
74
+ ```python
75
+ # Install dependencies
76
+ !pip install -q gradio huggingface_hub transformers torch pandas
77
+
78
+ # Clone repository
79
+ !git clone https://github.com/ChauHPham/AITextDetector.git
80
+ %cd AITextDetector
81
+
82
+ # Login to Hugging Face
83
+ from huggingface_hub import login
84
+ login() # Paste your token here
85
+
86
+ # Deploy!
87
+ !gradio deploy
88
+ ```
89
+
90
+ ---
91
+
92
+ ## Troubleshooting
93
+
94
+ ### "Token not found" error
95
+ - Make sure you copied the full token from Hugging Face
96
+ - Tokens start with `hf_...`
97
+
98
+ ### "Space already exists" error
99
+ - Choose a different Space name
100
+ - Or delete the existing Space from [huggingface.co/spaces](https://huggingface.co/spaces)
101
+
102
+ ### Deployment takes too long
103
+ - Normal deployment takes 5-10 minutes
104
+ - Check the build logs in Hugging Face Spaces dashboard
105
+
106
+ ### Want to update your app?
107
+ - Just run `!gradio deploy` again from Colab
108
+ - It will update the existing Space
109
+
110
+ ---
111
+
112
+ ## Benefits of Hugging Face Spaces
113
+
114
+ ✅ **Free permanent hosting**
115
+ ✅ **No expiration** (unlike Colab public links)
116
+ ✅ **Shareable URL** that works forever
117
+ ✅ **Automatic updates** when you push code
118
+ ✅ **GPU support** (free tier available)
119
+
120
+ ---
121
+
122
+ ## Next Steps
123
+
124
+ After deployment:
125
+ 1. Share your Space URL with others
126
+ 2. Customize your Space's README.md
127
+ 3. Add a Space card to your GitHub README
128
+ 4. Update your app anytime by running `gradio deploy` again
129
+
130
+ Enjoy your permanently hosted AI Text Detector! 🚀
131
+
AITextDetector/AITextDetector/DEPLOY.md ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🚀 Deployment Guide
2
+
3
+ ## Google Colab (Recommended for Mac M2)
4
+
5
+ **Perfect for Mac M2 users** - avoids PyTorch MPS mutex lock issues!
6
+
7
+ ### Quick Start
8
+
9
+ 1. Open [Google Colab](https://colab.research.google.com/)
10
+ 2. Create a new notebook
11
+ 3. Run:
12
+
13
+ ```python
14
+ !pip install -q transformers torch pandas gradio kagglehub
15
+ !git clone https://github.com/ChauHPham/AITextDetector.git
16
+ %cd AITextDetector
17
+ !git checkout main
18
+ !python gradio_app.py
19
+ ```
20
+
21
+ 4. **Get your public link**: After running, you'll see:
22
+ ```
23
+ * Running on public URL: https://xxxxx.gradio.live
24
+ ```
25
+ This link is shareable and works as long as the Colab notebook is running!
26
+
27
+ ### Keep It Running
28
+
29
+ - Enable "Keep runtime alive" in Colab's runtime settings
30
+ - The public link expires after 1 week of inactivity
31
+ - For permanent hosting, use Hugging Face Spaces (see below)
32
+
33
+ ---
34
+
35
+ ## Hugging Face Spaces (Permanent Hosting)
36
+
37
+ Deploy your app permanently to Hugging Face Spaces for free!
38
+
39
+ ### Option 1: Deploy from Google Colab
40
+
41
+ **Perfect for Mac M2 users** - deploy directly from Colab!
42
+
43
+ ```python
44
+ # 1. Install dependencies
45
+ !pip install -q gradio huggingface_hub
46
+
47
+ # 2. Clone your repo (if not already done)
48
+ !git clone https://github.com/ChauHPham/AITextDetector.git
49
+ %cd AITextDetector
50
+
51
+ # 3. Login to Hugging Face (you'll need a token)
52
+ # Get your token from: https://huggingface.co/settings/tokens
53
+ from huggingface_hub import login
54
+ login() # Paste your token when prompted
55
+
56
+ # 4. Deploy!
57
+ !gradio deploy
58
+ ```
59
+
60
+ **Follow the prompts:**
61
+ 1. Enter your Hugging Face username
62
+ 2. Choose/create a Space name (e.g., `ai-text-detector`)
63
+ 3. Wait for deployment (~5-10 minutes)
64
+
65
+ Your app will be live at: `https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME`
66
+
67
+ ### Option 2: Using Gradio CLI (Local)
68
+
69
+ ```bash
70
+ # Install gradio if not already installed
71
+ pip install gradio
72
+
73
+ # Deploy from your project directory
74
+ gradio deploy
75
+ ```
76
+
77
+ Follow the prompts to:
78
+ 1. Login to Hugging Face (or create account)
79
+ 2. Choose/create a Space
80
+ 3. Deploy!
81
+
82
+ ### Option 3: Manual Deployment
83
+
84
+ 1. Create a new Space on [Hugging Face Spaces](https://huggingface.co/spaces)
85
+ 2. Choose "Gradio" as the SDK
86
+ 3. Upload your files:
87
+ - `gradio_app.py`
88
+ - `ai_text_detector/` (entire package)
89
+ - `requirements.txt`
90
+ - `README.md`
91
+ 4. Add a `README.md` in the Space with:
92
+ ```yaml
93
+ ---
94
+ title: AI Text Detector
95
+ emoji: 🔍
96
+ colorFrom: blue
97
+ colorTo: purple
98
+ sdk: gradio
99
+ app_file: gradio_app.py
100
+ pinned: false
101
+ ---
102
+ ```
103
+ 5. The Space will automatically build and deploy!
104
+
105
+ ---
106
+
107
+ ## Local Deployment
108
+
109
+ ### Requirements
110
+
111
+ - Python 3.8+
112
+ - See `requirements.txt`
113
+
114
+ ### Run Locally
115
+
116
+ ```bash
117
+ # Install dependencies
118
+ pip install -r requirements.txt
119
+ pip install -e .
120
+
121
+ # Run Gradio app
122
+ python gradio_app.py
123
+ ```
124
+
125
+ **Note for Mac M2 users**: Local training may fail due to PyTorch MPS bugs. Use Google Colab for training instead.
126
+
127
+ ---
128
+
129
+ ## Docker Deployment
130
+
131
+ ```bash
132
+ # Build
133
+ docker build -t ai-text-detector .
134
+
135
+ # Run
136
+ docker run -p 7860:7860 ai-text-detector
137
+ ```
138
+
139
+ ---
140
+
141
+ ## Troubleshooting
142
+
143
+ ### Mac M2 Issues
144
+
145
+ If you encounter `mutex.cc lock blocking` errors on Mac M2:
146
+ - ✅ **Use Google Colab** (recommended)
147
+ - ✅ Use Docker with Linux base image
148
+ - ❌ Local training may not work due to PyTorch MPS bugs
149
+
150
+ ### Model Loading Issues
151
+
152
+ The app automatically uses the Desklib pre-trained model if no trained model is found. The model downloads automatically on first use (~1.7GB).
153
+
AITextDetector/AITextDetector/README.md CHANGED
@@ -1,8 +1,31 @@
1
- # AI Text Detector (CLI)
 
 
 
 
 
 
2
 
3
- A learning project for detecting AI-generated vs. human-written text with a modular Python package, YAML configs, GPU auto-detection, and a CLI.
4
 
5
- ## Quickstart
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  ```bash
8
  # 1) Create & activate a virtualenv (recommended)
@@ -46,3 +69,12 @@ See `configs/default.yaml`. Key fields:
46
  * Labels standardized to `0=human`, `1=ai`.
47
  * Mixed precision (fp16) auto-enables on CUDA.
48
  * Evaluate with accuracy, macro-F1, and confusion matrix.
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: AITextDetector
3
+ app_file: gradio_app.py
4
+ sdk: gradio
5
+ sdk_version: 5.49.1
6
+ ---
7
+ # AI Text Detector
8
 
9
+ A learning project for detecting AI-generated vs. human-written text with a modular Python package, YAML configs, GPU auto-detection, CLI, and a **Gradio web interface**.
10
 
11
+ ## 🌐 Web Interface (Gradio)
12
+
13
+ **Try it now on Google Colab** (works perfectly on Mac M2!):
14
+
15
+ ```python
16
+ !pip install -q transformers torch pandas gradio kagglehub
17
+ !git clone https://github.com/ChauHPham/AITextDetector.git
18
+ %cd AITextDetector
19
+ !python gradio_app.py
20
+ ```
21
+
22
+ Get a **public shareable link** instantly! See [DEPLOY.md](DEPLOY.md) for deployment options.
23
+
24
+ ### 🍎 Mac M2 Users
25
+
26
+ **Google Colab is recommended** - local training may fail due to PyTorch MPS mutex lock issues. The Gradio app works great in Colab with free GPU!
27
+
28
+ ## Quickstart (CLI)
29
 
30
  ```bash
31
  # 1) Create & activate a virtualenv (recommended)
 
69
  * Labels standardized to `0=human`, `1=ai`.
70
  * Mixed precision (fp16) auto-enables on CUDA.
71
  * Evaluate with accuracy, macro-F1, and confusion matrix.
72
+ * **Mac M2 users**: Use Google Colab for training (see above) to avoid PyTorch MPS bugs.
73
+
74
+ ## Deployment
75
+
76
+ See [DEPLOY.md](DEPLOY.md) for:
77
+ - Google Colab setup (recommended for Mac M2)
78
+ - Hugging Face Spaces deployment (`gradio deploy`)
79
+ - Docker deployment
80
+ - Troubleshooting guide
AITextDetector/AITextDetector/deploy.sh ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Quick deployment script for Hugging Face Spaces
3
+
4
+ echo "🚀 Deploying AI Text Detector to Hugging Face Spaces..."
5
+ echo ""
6
+ echo "Make sure you have:"
7
+ echo " 1. Hugging Face account (https://huggingface.co/join)"
8
+ echo " 2. Gradio installed (pip install gradio)"
9
+ echo " 3. Hugging Face CLI installed (pip install huggingface_hub)"
10
+ echo ""
11
+ read -p "Press Enter to continue or Ctrl+C to cancel..."
12
+
13
+ # Deploy using Gradio CLI
14
+ gradio deploy
15
+
16
+ echo ""
17
+ echo "✅ Deployment complete!"
18
+ echo "Your app will be available at: https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME"
19
+
AITextDetector/README.md CHANGED
@@ -1,31 +1,8 @@
1
- ---
2
- title: AITextDetector
3
- app_file: gradio_app.py
4
- sdk: gradio
5
- sdk_version: 5.49.1
6
- ---
7
- # AI Text Detector
8
 
9
- A learning project for detecting AI-generated vs. human-written text with a modular Python package, YAML configs, GPU auto-detection, CLI, and a **Gradio web interface**.
10
 
11
- ## 🌐 Web Interface (Gradio)
12
-
13
- **Try it now on Google Colab** (works perfectly on Mac M2!):
14
-
15
- ```python
16
- !pip install -q transformers torch pandas gradio kagglehub
17
- !git clone https://github.com/ChauHPham/AITextDetector.git
18
- %cd AITextDetector
19
- !python gradio_app.py
20
- ```
21
-
22
- Get a **public shareable link** instantly! See [DEPLOY.md](DEPLOY.md) for deployment options.
23
-
24
- ### 🍎 Mac M2 Users
25
-
26
- **Google Colab is recommended** - local training may fail due to PyTorch MPS mutex lock issues. The Gradio app works great in Colab with free GPU!
27
-
28
- ## Quickstart (CLI)
29
 
30
  ```bash
31
  # 1) Create & activate a virtualenv (recommended)
@@ -69,12 +46,3 @@ See `configs/default.yaml`. Key fields:
69
  * Labels standardized to `0=human`, `1=ai`.
70
  * Mixed precision (fp16) auto-enables on CUDA.
71
  * Evaluate with accuracy, macro-F1, and confusion matrix.
72
- * **Mac M2 users**: Use Google Colab for training (see above) to avoid PyTorch MPS bugs.
73
-
74
- ## Deployment
75
-
76
- See [DEPLOY.md](DEPLOY.md) for:
77
- - Google Colab setup (recommended for Mac M2)
78
- - Hugging Face Spaces deployment (`gradio deploy`)
79
- - Docker deployment
80
- - Troubleshooting guide
 
1
+ # AI Text Detector (CLI)
 
 
 
 
 
 
2
 
3
+ A learning project for detecting AI-generated vs. human-written text with a modular Python package, YAML configs, GPU auto-detection, and a CLI.
4
 
5
+ ## Quickstart
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  ```bash
8
  # 1) Create & activate a virtualenv (recommended)
 
46
  * Labels standardized to `0=human`, `1=ai`.
47
  * Mixed precision (fp16) auto-enables on CUDA.
48
  * Evaluate with accuracy, macro-F1, and confusion matrix.
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -4,11 +4,28 @@ app_file: gradio_app.py
4
  sdk: gradio
5
  sdk_version: 5.49.1
6
  ---
7
- # AI Text Detector (CLI)
8
 
9
- A learning project for detecting AI-generated vs. human-written text with a modular Python package, YAML configs, GPU auto-detection, and a CLI.
10
 
11
- ## Quickstart
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  ```bash
14
  # 1) Create & activate a virtualenv (recommended)
@@ -52,3 +69,12 @@ See `configs/default.yaml`. Key fields:
52
  * Labels standardized to `0=human`, `1=ai`.
53
  * Mixed precision (fp16) auto-enables on CUDA.
54
  * Evaluate with accuracy, macro-F1, and confusion matrix.
 
 
 
 
 
 
 
 
 
 
4
  sdk: gradio
5
  sdk_version: 5.49.1
6
  ---
7
+ # AI Text Detector
8
 
9
+ A learning project for detecting AI-generated vs. human-written text with a modular Python package, YAML configs, GPU auto-detection, CLI, and a **Gradio web interface**.
10
 
11
+ ## 🌐 Web Interface (Gradio)
12
+
13
+ **Try it now on Google Colab** (works perfectly on Mac M2!):
14
+
15
+ ```python
16
+ !pip install -q transformers torch pandas gradio kagglehub
17
+ !git clone https://github.com/ChauHPham/AITextDetector.git
18
+ %cd AITextDetector
19
+ !python gradio_app.py
20
+ ```
21
+
22
+ Get a **public shareable link** instantly! See [DEPLOY.md](DEPLOY.md) for deployment options.
23
+
24
+ ### 🍎 Mac M2 Users
25
+
26
+ **Google Colab is recommended** - local training may fail due to PyTorch MPS mutex lock issues. The Gradio app works great in Colab with free GPU!
27
+
28
+ ## Quickstart (CLI)
29
 
30
  ```bash
31
  # 1) Create & activate a virtualenv (recommended)
 
69
  * Labels standardized to `0=human`, `1=ai`.
70
  * Mixed precision (fp16) auto-enables on CUDA.
71
  * Evaluate with accuracy, macro-F1, and confusion matrix.
72
+ * **Mac M2 users**: Use Google Colab for training (see above) to avoid PyTorch MPS bugs.
73
+
74
+ ## Deployment
75
+
76
+ See [DEPLOY.md](DEPLOY.md) for:
77
+ - Google Colab setup (recommended for Mac M2)
78
+ - Hugging Face Spaces deployment (`gradio deploy`)
79
+ - Docker deployment
80
+ - Troubleshooting guide
ai_text_detector/models.py CHANGED
@@ -1,16 +1,170 @@
1
  import os
 
2
 
3
  # Disable tokenizer parallelism and MPS on macOS
4
  if os.getenv("TOKENIZERS_PARALLELISM") is None:
5
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
6
 
7
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  class DetectorModel:
10
- def __init__(self, model_name="roberta-base", num_labels=2):
 
 
 
 
 
 
 
11
  self.model_name = model_name
12
- self.model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
13
- self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  def save(self, path: str):
16
  self.model.save_pretrained(path)
@@ -18,10 +172,28 @@ class DetectorModel:
18
 
19
  @classmethod
20
  def load(cls, path: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  model = AutoModelForSequenceClassification.from_pretrained(path)
22
  tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
23
  obj = cls.__new__(cls)
24
  obj.model_name = path
25
  obj.model = model
26
  obj.tokenizer = tokenizer
 
27
  return obj
 
1
  import os
2
+ import sys
3
 
4
  # Disable tokenizer parallelism and MPS on macOS
5
  if os.getenv("TOKENIZERS_PARALLELISM") is None:
6
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
7
 
8
+ import torch
9
+ import torch.nn as nn
10
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig, AutoModel, PreTrainedModel
11
+
12
+ class DesklibAIDetectionModel(PreTrainedModel):
13
+ """Desklib AI Detection Model - Pre-trained model for AI text detection"""
14
+ config_class = AutoConfig
15
+
16
+ def __init__(self, config):
17
+ super().__init__(config)
18
+ # Initialize the base transformer model
19
+ self.model = AutoModel.from_config(config)
20
+ # Define a classifier head
21
+ self.classifier = nn.Linear(config.hidden_size, 1)
22
+ # Initialize weights
23
+ self.init_weights()
24
+
25
+ def forward(self, input_ids, attention_mask=None, labels=None):
26
+ # Forward pass through the transformer
27
+ outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
28
+ last_hidden_state = outputs[0]
29
+
30
+ # Mean pooling
31
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
32
+ sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, dim=1)
33
+ sum_mask = torch.clamp(input_mask_expanded.sum(dim=1), min=1e-9)
34
+ pooled_output = sum_embeddings / sum_mask
35
+
36
+ # Classifier
37
+ logits = self.classifier(pooled_output)
38
+
39
+ loss = None
40
+ if labels is not None:
41
+ loss_fct = nn.BCEWithLogitsLoss()
42
+ loss = loss_fct(logits.view(-1), labels.float())
43
+
44
+ output = {"logits": logits}
45
+ if loss is not None:
46
+ output["loss"] = loss
47
+ return output
48
 
49
  class DetectorModel:
50
+ def __init__(self, model_name="desklib/ai-text-detector-v1.01", use_desklib=True):
51
+ """
52
+ Initialize detector model.
53
+
54
+ Args:
55
+ model_name: Model name or path. Defaults to Desklib pre-trained model.
56
+ use_desklib: If True, use Desklib model architecture. If False, use standard classification.
57
+ """
58
  self.model_name = model_name
59
+ self.use_desklib = use_desklib
60
+
61
+ if use_desklib and "desklib" in model_name:
62
+ # Try to load Desklib model, but fallback if MPS issues occur
63
+ if sys.platform == "darwin":
64
+ # On macOS: try multiple loading strategies
65
+ try:
66
+ # Strategy 1: Load with low_cpu_mem_usage and explicit CPU
67
+ print("Attempting to load Desklib model...")
68
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
69
+ config = AutoConfig.from_pretrained(model_name)
70
+
71
+ # Try loading with safetensors if available
72
+ try:
73
+ from transformers import AutoModel
74
+ # Load base model first
75
+ base_model = AutoModel.from_pretrained(
76
+ model_name,
77
+ torch_dtype=torch.float32,
78
+ low_cpu_mem_usage=True,
79
+ device_map="cpu"
80
+ )
81
+ # Create Desklib model wrapper
82
+ self.model = DesklibAIDetectionModel(config)
83
+ self.model.model = base_model
84
+ self.model = self.model.to("cpu")
85
+ # Load classifier weights
86
+ from transformers.utils import cached_file
87
+ try:
88
+ classifier_path = cached_file(model_name, "pytorch_model.bin")
89
+ state_dict = torch.load(classifier_path, map_location="cpu")
90
+ # Only load classifier weights
91
+ classifier_dict = {k: v for k, v in state_dict.items() if "classifier" in k}
92
+ if classifier_dict:
93
+ self.model.load_state_dict(classifier_dict, strict=False)
94
+ except:
95
+ pass # Use initialized classifier
96
+ self.model.eval()
97
+ print("✅ Desklib model loaded successfully!")
98
+ except Exception as e:
99
+ print(f"⚠️ Desklib model loading failed: {e}")
100
+ print("Falling back to DistilBERT model...")
101
+ raise
102
+ except:
103
+ # Fallback to a smaller, simpler model
104
+ print("Using DistilBERT as fallback (smaller, more compatible)")
105
+ self.use_desklib = False
106
+ self.model = AutoModelForSequenceClassification.from_pretrained(
107
+ "distilbert-base-uncased",
108
+ num_labels=2
109
+ )
110
+ self.tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
111
+ self.model = self.model.to("cpu")
112
+ else:
113
+ # Non-macOS: standard loading
114
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
115
+ config = AutoConfig.from_pretrained(model_name)
116
+ self.model = DesklibAIDetectionModel.from_pretrained(model_name)
117
+ else:
118
+ # Fallback to standard classification model
119
+ self.model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
120
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
121
+ self.use_desklib = False
122
+
123
+ def predict(self, text, max_length=768, threshold=0.5):
124
+ """
125
+ Predict if text is AI-generated.
126
+
127
+ Args:
128
+ text: Input text to classify
129
+ max_length: Maximum sequence length
130
+ threshold: Probability threshold for classification
131
+
132
+ Returns:
133
+ tuple: (probability, label) where label is 1 for AI-generated, 0 for human
134
+ """
135
+ # Tokenize
136
+ encoded = self.tokenizer(
137
+ text,
138
+ padding='max_length',
139
+ truncation=True,
140
+ max_length=max_length,
141
+ return_tensors='pt'
142
+ )
143
+
144
+ input_ids = encoded['input_ids']
145
+ attention_mask = encoded['attention_mask']
146
+
147
+ # Get device
148
+ device = next(self.model.parameters()).device
149
+ input_ids = input_ids.to(device)
150
+ attention_mask = attention_mask.to(device)
151
+
152
+ # Predict
153
+ self.model.eval()
154
+ with torch.no_grad():
155
+ if self.use_desklib:
156
+ outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
157
+ logits = outputs["logits"]
158
+ probability = torch.sigmoid(logits).item()
159
+ else:
160
+ outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
161
+ probs = torch.softmax(outputs.logits, dim=1)
162
+ # For standard models: prob[0] = human, prob[1] = AI
163
+ probability = probs[0][1].item()
164
+
165
+ label = 1 if probability >= threshold else 0
166
+
167
+ return probability, label
168
 
169
  def save(self, path: str):
170
  self.model.save_pretrained(path)
 
172
 
173
  @classmethod
174
  def load(cls, path: str):
175
+ # Try to detect if it's a Desklib model
176
+ try:
177
+ config = AutoConfig.from_pretrained(path)
178
+ # Check if it has the Desklib architecture
179
+ if hasattr(config, 'model_type') and 'deberta' in config.model_type.lower():
180
+ model = DesklibAIDetectionModel.from_pretrained(path)
181
+ tokenizer = AutoTokenizer.from_pretrained(path)
182
+ obj = cls.__new__(cls)
183
+ obj.model_name = path
184
+ obj.model = model
185
+ obj.tokenizer = tokenizer
186
+ obj.use_desklib = True
187
+ return obj
188
+ except:
189
+ pass
190
+
191
+ # Fallback to standard model
192
  model = AutoModelForSequenceClassification.from_pretrained(path)
193
  tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
194
  obj = cls.__new__(cls)
195
  obj.model_name = path
196
  obj.model = model
197
  obj.tokenizer = tokenizer
198
+ obj.use_desklib = False
199
  return obj
gradio_app.py CHANGED
@@ -46,13 +46,13 @@ def load_model():
46
  tokenizer = model.tokenizer
47
  except Exception as e:
48
  print(f"Failed to load model: {e}")
49
- print("Using base RoBERTa model instead.")
50
- model = DetectorModel("roberta-base")
51
  tokenizer = model.tokenizer
52
  else:
53
- print("No trained model found. Using base RoBERTa model for demo.")
54
- # Use a base model for demonstration
55
- model = DetectorModel("roberta-base")
56
  tokenizer = model.tokenizer
57
 
58
  # Load model lazily (on first use) to avoid startup issues
@@ -76,29 +76,16 @@ def detect_text(text):
76
  return "Please enter some text to analyze."
77
 
78
  try:
79
- # Tokenize the input text
80
- inputs = tokenizer(
81
- text,
82
- truncation=True,
83
- padding="max_length",
84
- max_length=256,
85
- return_tensors="pt"
86
- )
87
-
88
- # Get prediction
89
- with torch.no_grad():
90
- outputs = model.model(**inputs)
91
- probabilities = torch.softmax(outputs.logits, dim=1)
92
- human_prob = probabilities[0][0].item()
93
- ai_prob = probabilities[0][1].item()
94
 
95
  # Determine prediction
96
- if ai_prob > human_prob:
97
  label = "🤖 AI-generated"
98
  confidence = ai_prob
99
  else:
100
  label = "🧑 Human-written"
101
- confidence = human_prob
102
 
103
  return f"{label} (confidence: {confidence:.1%})"
104
 
@@ -161,4 +148,4 @@ with gr.Blocks(title="AI Text Detector", theme=gr.themes.Soft()) as app:
161
  )
162
 
163
  if __name__ == "__main__":
164
- app.launch(share=False, server_name="0.0.0.0", server_port=7860)
 
46
  tokenizer = model.tokenizer
47
  except Exception as e:
48
  print(f"Failed to load model: {e}")
49
+ print("Using Desklib pre-trained model instead.")
50
+ model = DetectorModel("desklib/ai-text-detector-v1.01", use_desklib=True)
51
  tokenizer = model.tokenizer
52
  else:
53
+ print("No trained model found. Using Desklib pre-trained AI detector model.")
54
+ # Use Desklib pre-trained model (no training needed!)
55
+ model = DetectorModel("desklib/ai-text-detector-v1.01", use_desklib=True)
56
  tokenizer = model.tokenizer
57
 
58
  # Load model lazily (on first use) to avoid startup issues
 
76
  return "Please enter some text to analyze."
77
 
78
  try:
79
+ # Use the model's predict method
80
+ ai_prob, predicted_label = model.predict(text, max_length=768, threshold=0.5)
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  # Determine prediction
83
+ if predicted_label == 1:
84
  label = "🤖 AI-generated"
85
  confidence = ai_prob
86
  else:
87
  label = "🧑 Human-written"
88
+ confidence = 1 - ai_prob # Human probability is 1 - AI probability
89
 
90
  return f"{label} (confidence: {confidence:.1%})"
91
 
 
148
  )
149
 
150
  if __name__ == "__main__":
151
+ app.launch(share=True, server_name="0.0.0.0", server_port=7860)