Spaces:

timothytzkung
/

SFU-IT-Chatbot-V2

Sleeping

App Files Files Community

timothytzkung commited on 14 days ago

Commit

e93d19d

verified ·

1 Parent(s): f129043

Update app.py

Browse files

- Added optimizations

Files changed (1) hide show

app.py +70 -104

app.py CHANGED Viewed

@@ -1,158 +1,124 @@
 import json
 import numpy as np
 import pandas as pd
-from transformers import pipeline
 from sentence_transformers import SentenceTransformer
 import gradio as gr
 import torch
 from huggingface_hub import login
 import os
-# Sanity Check
 hf_token = os.getenv("V2_TOKEN")
 if hf_token is None:
-    raise RuntimeError("V2_TOKEN environment variable is not set in this Space.")
-# Explicit login
 login(token=hf_token)
-# --- Configuration ---
-print("Loading RAG system on your device...")
-# Load Knowledge base
 FILE_PATH = "data.jsonl"
 PRELOAD_FILE_PATH = "preload-data.json"
-# Load data
-print(f"Found Preloaded Data! Using {PRELOAD_FILE_PATH}...")
-with open(PRELOAD_FILE_PATH, "r", encoding="utf-8") as f:
-    data = json.load(f)
-# Set data
-documents = data
-# Embeddings
 embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
-embeddings = embedding_model.encode(documents, convert_to_numpy=True)
-# Use pandas dataframe
-df = pd.DataFrame(
-    {
-        "Document": documents,
-        "Embedding": list(embeddings),  # store as list
-    }
-)
-# Load LLM Pipeline
-llm = pipeline(
-    "text-generation",
-    model="google/gemma-3-4b-it", # Might not have enough storage ngl
-    token=hf_token
-)
-def clean_query_with_llm(query):
-    prompt_content = f"""
-    Below is a new question asked by the user that needs to be answered by searching in a knowledge base.
-    You have access to SFU IT Knowledge Base index with 100's of chunked documents.
-    Generate a search question based the user's question.
-    If you cannot generate a search query, return just the number 0.
-    User's Question:
-    {query}
-    Search Query:
-    """
-    response = llm(
-        prompt_content,
-        max_new_tokens=100,
-        do_sample=False,
-        return_full_text=False
-    )
-    return response[0]["generated_text"].strip()
-# Retrieve w Pandas
-def retrieve_with_pandas(query: str, top_k: int = 5):
     """
-    Embed the query, compute cosine similarity to each document,
-    and return the top_k most similar documents (as a DataFrame).
     """
     query_embedding = embedding_model.encode([query])[0]
-    def cosine_sim(x):
-        x = np.array(x)
-        return float(
-            np.dot(query_embedding, x)
-            / (np.linalg.norm(query_embedding) * np.linalg.norm(x))
-        )
-    df["Similarity"] = df["Embedding"].apply(cosine_sim)
-    results = df.sort_values(by="Similarity", ascending=False).head(top_k)
-    return results[["Document", "Similarity"]]
-def generate_with_rag(query, top_k=5):
-        # goSFU specific cleaning
     if "gosfu" in query.lower():
         query = query.replace("gosfu", "goSFU")
     # Retrieve
-    search_query = clean_query_with_llm(query)
-    results = retrieve_with_pandas(search_query)
-    # Turn the Series into a single string of text
-    # (each doc separated by a divider)
-    context_str = "\n\n---\n\n".join(results["Document"].tolist())
-    # Build a clean prompt
     prompt_content = f"""
     You are a SFU IT helpdesk chatbot.
-    Your task is to answer SFU IT related questions such as accessing various technology services or general troubleshooting.
-    Below is new question asked by the user, and related article chunks to the user question.
-    If the user asked a question, answer the user's question with short step by step instructions: consider all the articles below.
-    If there are links in the articles, provide those links in your answer.
-    If the user asked a question and the answer is not in the contexts, say that you're sorry that you can't help them and suggest contacting SFU IT at 778-782-8888 or by submitting an inquiry ticket at https://www.sfu.ca/information-systems/get-help.html
-    If the user DID NOT ask a question, be friendly and ask how you can help them.
-    Do not recommend, suggest, or provide any advice on anything that is not related to SFU or SFU IT.
-    If the user asked something relating to mental health or is seeking medical advice, redirect them to SFU Health & Counselling at https://www.sfu.ca/students/health.html
-    Do not ask the user any follow-up questions after answering them.
-Question:
-{query}
--- Start of Articles --
-{context_str}
--- End of Articles --
-Answer:"""
-    # Call the LLM
     response = llm(
         prompt_content,
-        max_new_tokens=500,
         do_sample=False,
         return_full_text=False
     )
     return response[0]["generated_text"].strip()
 def chat_fn(message, history):
-    """
-    Chat Interface callback
-    """
-    answer = generate_with_rag(message, top_k=5)
-    return answer
 demo = gr.ChatInterface(
     fn=chat_fn,
-    title="SFU IT Chatbot",
     description="Enter your question and the SFU IT Chatbot will try to answer using retrieved SFU IT knowledge.",
 )
-# share=True
 if __name__ == "__main__":
     demo.launch()

 import json
 import numpy as np
 import pandas as pd
+from transformers import pipeline, BitsAndBytesConfig
 from sentence_transformers import SentenceTransformer
 import gradio as gr
 import torch
 from huggingface_hub import login
 import os
+# --- Setup & Configuration ---
 hf_token = os.getenv("V2_TOKEN")
 if hf_token is None:
+    raise RuntimeError("V2_TOKEN environment variable is not set.")
 login(token=hf_token)
+PRELOAD_PARQUET = "preload.parquet"
+print("Loading RAG system...")
+# --- Load Data & Pre-compute Embeddings ---
+# optimization: Ensure we aren't re-embedding every restart if possible.
 FILE_PATH = "data.jsonl"
 PRELOAD_FILE_PATH = "preload-data.json"
+# Load Embedding Model
 embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
+# Pre-calculate embeddings once and stack them into a numpy matrix for fast math nyoom
+if not os.path.exists(PRELOAD_PARQUET):
+    print(f"Loading data from {PRELOAD_FILE_PATH}...")
+    with open(PRELOAD_FILE_PATH, "r", encoding="utf-8") as f:
+        documents = json.load(f)
+    print("Generating/Loading embeddings...")
+    doc_embeddings = embedding_model.encode(documents, convert_to_numpy=True)
+    # Normalize embeddings now so only need dot product later (faster than cosine calc every time I guess)
+    doc_embeddings = doc_embeddings / np.linalg.norm(doc_embeddings, axis=1, keepdims=True)
+    # Create DataFrame just for text storage (we will use numpy for math)
+    df = pd.DataFrame({"Document": documents})
+    tmp = df.to_parquet(PRELOAD_PARQUET)
+else:
+    print("Parquet found!")
+    df = pd.read_parquet(PRELOAD_PARQUET, engine='fastparquet')
+print("Parquet established.")
+print("Loading LLM...")
+llm = pipeline(
+    "text-generation",
+    model="google/gemma-3-1b-it",
+    token=hf_token,
+)
+# --- Optimized Retrieval Function ---
+def retrieve_vectorized(query: str, top_k: int = 5):
     """
+    Uses Matrix Multiplication instead of Row-by-Row iteration.
     """
+    # Encode query
     query_embedding = embedding_model.encode([query])[0]
+    # Normalize query
+    query_norm = query_embedding / np.linalg.norm(query_embedding)
+    scores = np.dot(doc_embeddings, query_norm)
+    top_indices = np.argsort(scores)[::-1][:top_k]
+    # Retrieve documents
+    results = df.iloc[top_indices].copy()
+    return results["Document"].tolist()
+# --- Main Generation Function ---
+def generate_with_rag(query):
+    # goSFU specific cleaning
     if "gosfu" in query.lower():
         query = query.replace("gosfu", "goSFU")
     # Retrieve
+    retrieved_docs = retrieve_vectorized(query, top_k=5)
+    context_str = "\n\n---\n\n".join(retrieved_docs)
+    # Prompt
     prompt_content = f"""
     You are a SFU IT helpdesk chatbot.
+    Your task is to answer SFU IT related questions.
+    Context Articles:
+    {context_str}
+    User Question: {query}
+    Instructions:
+    1. Answer the question using ONLY the Context Articles above.
+    2. Provide step-by-step instructions and include relevant links found in the text.
+    3. If the answer is not in the context, suggest contacting SFU IT at 778-782-8888.
+    4. If the user is asking about mental health, redirect to SFU Health & Counselling.
+    Answer:"""
     response = llm(
         prompt_content,
+        max_new_tokens=300, # Reduced token count for speed
         do_sample=False,
         return_full_text=False
     )
     return response[0]["generated_text"].strip()
 def chat_fn(message, history):
+    return generate_with_rag(message)
 demo = gr.ChatInterface(
     fn=chat_fn,
+    title="SFU IT Chatbot (Optimized)",
     description="Enter your question and the SFU IT Chatbot will try to answer using retrieved SFU IT knowledge.",
 )
 if __name__ == "__main__":
     demo.launch()