Spaces:

saish-shetty
/

phi-2-oasst-instruct

Runtime error

App Files Files Community

Update app.py

by saish-shetty - opened Mar 1

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+75

-72

Files changed (1) hide show

app.py +75 -72

app.py CHANGED Viewed

@@ -1,73 +1,76 @@
-import gradio as gr
-from peft import PeftModel
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
-import torch
-import os
-os.environ["CUDA_VISIBLE_DEVICES"] = ""  # Disable CUDA for Gradio
-# Model and tokenizer loading
-model_name = "microsoft/phi-2"  # Replace with your base model name
-adapter_path = "./checkpoint-500"  # Path to your adapter directory (relative to app.py)
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_quant_type="nf4",
-    bnb_4bit_compute_dtype=torch.float16,
-)
-tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-tokenizer.pad_token = tokenizer.eos_token
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    device_map="auto",
-    quantization_config=bnb_config,
-    trust_remote_code=True,
-    torch_dtype=torch.float16,  # Use float16 for faster inference
-)
-model = PeftModel.from_pretrained(model, adapter_path)
-model.eval()
-# Inference function
-def generate_text(prompt, max_length=200, temperature=0.7, top_p=0.9):
-    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
-    attention_mask = tokenizer(prompt, return_tensors="pt").attention_mask.to(model.device)
-    with torch.no_grad():
-        outputs = model.generate(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            max_length=max_length,
-            temperature=temperature,
-            top_p=top_p,
-            do_sample=True,
-            pad_token_id=tokenizer.pad_token_id,
-        )
-    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return generated_text
-# Sample questions
-sample_questions = [
-    "Write a short story about a dog who becomes a detective.",
-    "What is 2+2?",
-    "Write a Flask App in python to say 'Hello World!'",
-    "Give me a short 200-word essay on 'monospony'.",
-]
-# Gradio interface
-iface = gr.Interface(
-    fn=generate_text,
-    inputs=[
-        gr.Textbox(lines=5, label="Prompt"),
-        gr.Slider(minimum=50, maximum=500, value=250, label="Max Length"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.1, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top P"),
-    ],
-    outputs=gr.Textbox(label="Generated Text"),
-    title="Phi-2 OASST Fine-Tuning Demo",
-    description="Generate text using a fine-tuned Phi-2 model with PEFT adapters. Click a sample question below to get started!",
-    examples=[[q, 250, 0.1, 0.9] for q in sample_questions],  # Add examples
-)
 iface.launch()

+import gradio as gr
+from peft import PeftModel
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+import torch
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""  # Disable CUDA for Gradio
+os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface_cache"
+os.environ["HF_HOME"] = "/tmp/huggingface_home"
+# Model and tokenizer loading
+model_name = "microsoft/phi-2"  # Replace with your base model name
+adapter_path = "./checkpoint-500"  # Path to your adapter directory (relative to app.py)
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+)
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+tokenizer.pad_token = tokenizer.eos_token
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    device_map="auto",
+    quantization_config=bnb_config,
+    trust_remote_code=True,
+    torch_dtype=torch.float16,  # Use float16 for faster inference
+)
+model = PeftModel.from_pretrained(model, adapter_path)
+model.eval()
+# Inference function
+def generate_text(prompt, max_length=200, temperature=0.7, top_p=0.9):
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
+    attention_mask = tokenizer(prompt, return_tensors="pt").attention_mask.to(model.device)
+    with torch.no_grad():
+        outputs = model.generate(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            max_length=max_length,
+            temperature=temperature,
+            top_p=top_p,
+            do_sample=True,
+            pad_token_id=tokenizer.pad_token_id,
+        )
+    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return generated_text
+# Sample questions
+sample_questions = [
+    "Write a short story about a dog who becomes a detective.",
+    "What is 2+2?",
+    "Write a Flask App in python to say 'Hello World!'",
+    "Give me a short 200-word essay on 'monospony'.",
+]
+# Gradio interface
+iface = gr.Interface(
+    fn=generate_text,
+    inputs=[
+        gr.Textbox(lines=5, label="Prompt"),
+        gr.Slider(minimum=50, maximum=500, value=250, label="Max Length"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top P"),
+    ],
+    outputs=gr.Textbox(label="Generated Text"),
+    title="Phi-2 OASST Fine-Tuning Demo",
+    description="Generate text using a fine-tuned Phi-2 model with PEFT adapters. Click a sample question below to get started!",
+    examples=[[q, 250, 0.1, 0.9] for q in sample_questions],  # Add examples
+)
 iface.launch()