""" Hybrid RAG System - Production Demo Deployed on HuggingFace Spaces """ import gradio as gr import pandas as pd import numpy as np import torch from sentence_transformers import SentenceTransformer, util from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import nltk # Download NLTK data try: nltk.download('stopwords', quiet=True) nltk.download('punkt', quiet=True) except: pass print("🚀 Loading Hybrid RAG System...") class SimpleRAGDemo: def __init__(self): self.device = "cpu" # Force CPU for HF Spaces print("Loading models...") # Embedding model self.emb_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') # Generator self.tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small") self.model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small") # Sample corpus (for demo) self.texts = [ "machine learning is a subset of artificial intelligence that enables computers to learn from data without explicit programming", "deep learning uses artificial neural networks with multiple layers to process information in a hierarchical manner", "natural language processing helps computers understand analyze and generate human language", "retrieval augmented generation combines information retrieval with language generation for more accurate responses", "transformers are neural network architectures that use self-attention mechanisms for processing sequential data", "bert is a transformer-based model pretrained on large text corpora for natural language understanding tasks", "question answering systems retrieve relevant information and generate concise answers to user queries", "semantic search uses embeddings to find documents based on meaning rather than exact keyword matches", "neural embeddings represent words or documents as dense vectors in a continuous space", "language models predict the probability of word sequences and can generate coherent text" ] print("Creating embeddings...") self.embeddings = self.emb_model.encode(self.texts, convert_to_tensor=True) print("✅ System ready!") def retrieve_and_answer(self, query, top_k=3): if not query or len(query.strip()) < 3: return "❌ Please enter a valid question.", "" # Dense retrieval query_emb = self.emb_model.encode(query, convert_to_tensor=True) similarities = util.cos_sim(query_emb, self.embeddings)[0] top_idx = torch.argsort(similarities, descending=True)[:top_k] contexts = [self.texts[i] for i in top_idx] scores = [float(similarities[i]) for i in top_idx] # Generate answer context_text = "\n".join([f"[{i+1}] {c}" for i, c in enumerate(contexts)]) prompt = f"Answer based on context:\n{context_text}\n\nQuestion: {query}\nAnswer:" inputs = self.tokenizer(prompt, max_length=512, truncation=True, return_tensors="pt") with torch.no_grad(): outputs = self.model.generate(**inputs, max_length=100) answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True) # Format outputs answer_md = f""" ### 🤖 Generated Answer **{answer}** --- ### 📊 Key Research Finding This demo showcases my Master's thesis work where I achieved a **64.5% improvement** in semantic similarity using dense retrieval compared to traditional sparse methods. **Evaluation:** 198 queries | 5,000 documents | MS MARCO dataset """ retrieved_md = "### 📄 Retrieved Documents\n\n" for i, (ctx, score) in enumerate(zip(contexts, scores), 1): retrieved_md += f"**[{i}]** Relevance: `{score:.4f}`\n\n{ctx}\n\n---\n\n" return answer_md, retrieved_md # Initialize system print("Initializing RAG system...") rag = SimpleRAGDemo() # Create Gradio interface demo = gr.Blocks( title="Hybrid RAG System - Master's Thesis Demo", theme=gr.themes.Soft() ) with demo: gr.Markdown(""" # 🎯 Hybrid RAG System - Interactive Demo **Master's Thesis Project** | AI-Powered Evidence-Based Question Answering --- ## 🌟 Research Achievement **64.5% improvement** in semantic similarity over baseline methods! This system demonstrates advanced Retrieval-Augmented Generation combining: - 🔍 Dense neural retrieval (Sentence Transformers) - 🤖 Answer generation (FLAN-T5) - 📊 Production-ready pipeline --- """) with gr.Row(): with gr.Column(scale=1): query_input = gr.Textbox( label="❓ Enter Your Question", placeholder="e.g., What is machine learning?", lines=3 ) submit_btn = gr.Button("🚀 Get Answer", variant="primary", size="lg") gr.Markdown("### 📝 Example Questions:") gr.Examples( examples=[ "What is machine learning?", "Explain deep learning", "What are transformers in NLP?", "How does semantic search work?", "What is retrieval augmented generation?", ], inputs=query_input ) with gr.Column(scale=2): answer_output = gr.Markdown(label="Answer & Research Info") retrieved_output = gr.Markdown(label="Retrieved Context") submit_btn.click( fn=rag.retrieve_and_answer, inputs=[query_input], outputs=[answer_output, retrieved_output] ) gr.Markdown(""" --- ## 📊 Complete Research Results | Method | Semantic Similarity | Improvement | |--------|-------------------|-------------| | **Dense (Neural Embeddings)** | **0.1966** | **+64.5%** ⭐ | | Hybrid (Weighted Fusion) | 0.1816 | +51.9% | | Hybrid (RRF) | 0.1542 | +28.9% | | Sparse (BM25) | 0.1196 | Baseline | **Evaluation Details:** - Dataset: MS MARCO - Corpus: 5,000 documents - Queries: 198 real-world questions - Metrics: ROUGE, BLEU, Semantic Similarity --- ## 👨‍💻 About This Project **Title:** AI-Powered Retrieval-Augmented Assistant for Evidence-Based Question Answering **Author:** [Your Name] **Institution:** [Your University] **Location:** Berlin, Germany 🇩🇪 **Technologies:** - Python, PyTorch, Transformers - Sentence-Transformers, BM25 - Gradio, HuggingFace **GitHub:** [Your GitHub Link] **LinkedIn:** [Your LinkedIn] **Email:** [Your Email] --- ### 💼 Currently Seeking Full-Time Opportunities I'm actively looking for **AI/ML Engineer** positions in Berlin, Germany! If you're interested in my work, please reach out at [your.email@university.edu] --- Built with ❤️ using Gradio and HuggingFace Transformers """) # Launch demo.launch()