Spaces:

Chris4K
/

1.58-bitnet

Runtime error

App Files Files Community

Chris4K commited on Jun 19

Commit

041c060

verified ·

1 Parent(s): 17f987a

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -38

app.py CHANGED Viewed

@@ -1,44 +1,73 @@
 import gradio as gr
-# === Simple echo bot logic ===
-def chat_response(history, message):
-    response = f"You said: {message}"
-    history = history + [(message, response)]
-    return history, ""
-# === HTML + JS to inject Quill ===
-quill_html = """
-<div id="editor-container" style="height: 150px; border: 1px solid #ccc;"></div>
-<button onclick="submitQuill()" style="margin-top:10px;">Send</button>
-<script src="https://cdn.quilljs.com/1.3.6/quill.js"></script>
-<link href="https://cdn.quilljs.com/1.3.6/quill.snow.css" rel="stylesheet">
-<script>
-  let quill = new Quill('#editor-container', {
-    theme: 'snow'
-  });
-  function submitQuill() {
-    const text = quill.root.innerHTML;
-    document.querySelector('textarea[data-testid="quill_input"]').value = text;
-    document.querySelector('button[data-testid="quill_hidden_submit"]').click();
-  }
-</script>
-"""
-# === Gradio Interface ===
-with gr.Blocks() as demo:
     chatbot = gr.Chatbot()
-    with gr.Row():
-        gr.HTML(quill_html)
-    # Hidden components to pass data from JS
-    msg = gr.Textbox(visible=False, elem_id="quill_input")
-    submit_btn = gr.Button("Submit Hidden", visible=False, elem_id="quill_hidden_submit")
     state = gr.State([])
-    submit_btn.click(chat_response, inputs=[state, msg], outputs=[chatbot, msg], show_progress=False).then(
-        lambda h: h, inputs=chatbot, outputs=state
-    )
-demo.launch(share=True, debug=True)

+# llama158_chatbot.py
+# 🧪 INSTALLATION (run this separately in terminal before launching)
+# pip install torch --index-url https://download.pytorch.org/whl/cu121
+# pip install git+https://github.com/huggingface/transformers.git@refs/pull/33410/head
+# pip install gradio
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
 import gradio as gr
+# 🧠 Load tokenizer and model
+model_id = "HF1BitLLM/Llama3-8B-1.58-100B-tokens"
+tokenizer_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+print("🔄 Loading tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
+print("🧠 Loading 1.58-bit model...")
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    torch_dtype=torch.bfloat16  # Ensure GPU supports BF16 (e.g. A100/4090)
+)
+# 🗣️ Chat function
+def chat(user_input, history):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # Assemble prompt from history
+    full_input = ""
+    for turn in history:
+        full_input += f"User: {turn[0]}\nAssistant: {turn[1]}\n"
+    full_input += f"User: {user_input}\nAssistant:"
+    # Tokenize and truncate if needed
+    input_ids = tokenizer.encode(full_input, return_tensors="pt", truncation=True, max_length=4000).to(device)
+    model.to(device)
+    try:
+        with torch.no_grad():
+            output = model.generate(
+                input_ids,
+                max_new_tokens=100,
+                do_sample=True,
+                temperature=0.7
+            )
+        response = tokenizer.decode(output[0], skip_special_tokens=True)
+        reply = response.split("Assistant:")[-1].strip()
+    except Exception as e:
+        reply = f"⚠️ Error: {str(e)}"
+    history.append((user_input, reply))
+    return reply, history
+# 🧙🏾‍♂️ Launch Gradio Chat Interface
+with gr.Blocks(title="🦙 Llama3-8B-1.58 Chatbot") as demo:
+    gr.Markdown("## 🦙 Llama3-8B-1.58 Chatbot\nChat with a super-efficient 1-bit model!")
     chatbot = gr.Chatbot()
+    msg = gr.Textbox(label="Your message", placeholder="Ask me anything...")
+    clear = gr.Button("Clear")
     state = gr.State([])
+    def respond(user_message, history):
+        reply, new_history = chat(user_message, history)
+        return new_history, new_history
+    msg.submit(respond, [msg, state], [chatbot, state])
+    clear.click(lambda: ([], []), None, [chatbot, state])
+demo.launch(share=True,debug=True)