PromptEnhancer_32B-FlashPack

Sleeping

App Files Files Community

rahul7star commited on Oct 27

Commit

d071e42

verified ·

1 Parent(s): 248fe25

Update app_flash.py

Browse files

Files changed (1) hide show

app_flash.py +15 -14

app_flash.py CHANGED Viewed

@@ -3,47 +3,48 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from flashpack.integrations.transformers import FlashPackTransformersModelMixin
 # ============================================================
-# 1️⃣ Define FlashPack-enabled model class
 # ============================================================
 class FlashPackGemmaModel(AutoModelForCausalLM, FlashPackTransformersModelMixin):
     """AutoModelForCausalLM extended with FlashPackMixin for fast save/load"""
     pass
 # ============================================================
-# 2️⃣ Load or prepare model
 # ============================================================
-MODEL_ID = "gokaygokay/prompt-enhancer-gemma-3-270m-it"
 try:
-    print("📂 Trying to load FlashPack model...")
     model = FlashPackGemmaModel.from_pretrained_flashpack("model_flashpack")
     tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 except Exception as e:
-    print("⚙️ FlashPack not found, loading from Hugging Face Hub...")
     tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-    model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
-    # Save as FlashPack for faster next load
     model.save_pretrained_flashpack("model_flashpack")
     print("✅ Model saved as FlashPack for next startup!")
-# Create the Hugging Face text-generation pipeline
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
 # ============================================================
-# 3️⃣ Define inference logic
 # ============================================================
 def enhance_prompt(user_prompt, temperature, max_tokens, chat_history):
     chat_history = chat_history or []
-    # Build messages for chat-template
     messages = [
         {"role": "system", "content": "Enhance and expand the following prompt with more details and context:"},
         {"role": "user", "content": user_prompt},
     ]
-    # Use tokenizer.apply_chat_template
     prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     outputs = pipe(
@@ -63,7 +64,7 @@ def enhance_prompt(user_prompt, temperature, max_tokens, chat_history):
 # ============================================================
-# 4️⃣ Gradio Interface
 # ============================================================
 with gr.Blocks(title="Prompt Enhancer – Gemma 3 270M", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
@@ -103,7 +104,7 @@ with gr.Blocks(title="Prompt Enhancer – Gemma 3 270M", theme=gr.themes.Soft())
 # ============================================================
-# 5️⃣ Launch App
 # ============================================================
 if __name__ == "__main__":
     demo.launch(show_error=True)

 from flashpack.integrations.transformers import FlashPackTransformersModelMixin
 # ============================================================
+# 1️⃣ FlashPack-enabled model class
 # ============================================================
 class FlashPackGemmaModel(AutoModelForCausalLM, FlashPackTransformersModelMixin):
     """AutoModelForCausalLM extended with FlashPackMixin for fast save/load"""
     pass
+MODEL_ID = "gokaygokay/prompt-enhancer-gemma-3-270m-it"
 # ============================================================
+# 2️⃣ Load model and tokenizer with FlashPack
 # ============================================================
 try:
+    print("📂 Trying to load model from FlashPack directory...")
     model = FlashPackGemmaModel.from_pretrained_flashpack("model_flashpack")
     tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 except Exception as e:
+    print("⚙️ FlashPack model not found, loading from Hugging Face Hub...")
     tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+    # Load Hugging Face model and wrap into FlashPack class
+    model = FlashPackGemmaModel.from_pretrained(MODEL_ID)
+    # Save for future faster loads
     model.save_pretrained_flashpack("model_flashpack")
     print("✅ Model saved as FlashPack for next startup!")
+# ============================================================
+# 3️⃣ Create text-generation pipeline
+# ============================================================
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
 # ============================================================
+# 4️⃣ Define prompt enhancement logic
 # ============================================================
 def enhance_prompt(user_prompt, temperature, max_tokens, chat_history):
     chat_history = chat_history or []
     messages = [
         {"role": "system", "content": "Enhance and expand the following prompt with more details and context:"},
         {"role": "user", "content": user_prompt},
     ]
+    # Use chat-template
     prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     outputs = pipe(
 # ============================================================
+# 5️⃣ Gradio Interface
 # ============================================================
 with gr.Blocks(title="Prompt Enhancer – Gemma 3 270M", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
 # ============================================================
+# 6️⃣ Launch App
 # ============================================================
 if __name__ == "__main__":
     demo.launch(show_error=True)