Spaces:
Sleeping
Sleeping
| """ | |
| Hybrid RAG System - Production Demo | |
| Deployed on HuggingFace Spaces | |
| """ | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import torch | |
| from sentence_transformers import SentenceTransformer, util | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| import nltk | |
| # Download NLTK data | |
| try: | |
| nltk.download('stopwords', quiet=True) | |
| nltk.download('punkt', quiet=True) | |
| except: | |
| pass | |
| print("π Loading Hybrid RAG System...") | |
| class SimpleRAGDemo: | |
| def __init__(self): | |
| self.device = "cpu" # Force CPU for HF Spaces | |
| print("Loading models...") | |
| # Embedding model | |
| self.emb_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
| # Generator | |
| self.tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small") | |
| self.model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small") | |
| # Sample corpus (for demo) | |
| self.texts = [ | |
| "machine learning is a subset of artificial intelligence that enables computers to learn from data without explicit programming", | |
| "deep learning uses artificial neural networks with multiple layers to process information in a hierarchical manner", | |
| "natural language processing helps computers understand analyze and generate human language", | |
| "retrieval augmented generation combines information retrieval with language generation for more accurate responses", | |
| "transformers are neural network architectures that use self-attention mechanisms for processing sequential data", | |
| "bert is a transformer-based model pretrained on large text corpora for natural language understanding tasks", | |
| "question answering systems retrieve relevant information and generate concise answers to user queries", | |
| "semantic search uses embeddings to find documents based on meaning rather than exact keyword matches", | |
| "neural embeddings represent words or documents as dense vectors in a continuous space", | |
| "language models predict the probability of word sequences and can generate coherent text" | |
| ] | |
| print("Creating embeddings...") | |
| self.embeddings = self.emb_model.encode(self.texts, convert_to_tensor=True) | |
| print("β System ready!") | |
| def retrieve_and_answer(self, query, top_k=3): | |
| if not query or len(query.strip()) < 3: | |
| return "β Please enter a valid question.", "" | |
| # Dense retrieval | |
| query_emb = self.emb_model.encode(query, convert_to_tensor=True) | |
| similarities = util.cos_sim(query_emb, self.embeddings)[0] | |
| top_idx = torch.argsort(similarities, descending=True)[:top_k] | |
| contexts = [self.texts[i] for i in top_idx] | |
| scores = [float(similarities[i]) for i in top_idx] | |
| # Generate answer | |
| context_text = "\n".join([f"[{i+1}] {c}" for i, c in enumerate(contexts)]) | |
| prompt = f"Answer based on context:\n{context_text}\n\nQuestion: {query}\nAnswer:" | |
| inputs = self.tokenizer(prompt, max_length=512, truncation=True, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = self.model.generate(**inputs, max_length=100) | |
| answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Format outputs | |
| answer_md = f""" | |
| ### π€ Generated Answer | |
| **{answer}** | |
| --- | |
| ### π Key Research Finding | |
| This demo showcases my Master's thesis work where I achieved a **64.5% improvement** | |
| in semantic similarity using dense retrieval compared to traditional sparse methods. | |
| **Evaluation:** 198 queries | 5,000 documents | MS MARCO dataset | |
| """ | |
| retrieved_md = "### π Retrieved Documents\n\n" | |
| for i, (ctx, score) in enumerate(zip(contexts, scores), 1): | |
| retrieved_md += f"**[{i}]** Relevance: `{score:.4f}`\n\n{ctx}\n\n---\n\n" | |
| return answer_md, retrieved_md | |
| # Initialize system | |
| print("Initializing RAG system...") | |
| rag = SimpleRAGDemo() | |
| # Create Gradio interface | |
| demo = gr.Blocks( | |
| title="Hybrid RAG System - Master's Thesis Demo", | |
| theme=gr.themes.Soft() | |
| ) | |
| with demo: | |
| gr.Markdown(""" | |
| # π― Hybrid RAG System - Interactive Demo | |
| **Master's Thesis Project** | AI-Powered Evidence-Based Question Answering | |
| --- | |
| ## π Research Achievement | |
| **64.5% improvement** in semantic similarity over baseline methods! | |
| This system demonstrates advanced Retrieval-Augmented Generation combining: | |
| - π Dense neural retrieval (Sentence Transformers) | |
| - π€ Answer generation (FLAN-T5) | |
| - π Production-ready pipeline | |
| --- | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| query_input = gr.Textbox( | |
| label="β Enter Your Question", | |
| placeholder="e.g., What is machine learning?", | |
| lines=3 | |
| ) | |
| submit_btn = gr.Button("π Get Answer", variant="primary", size="lg") | |
| gr.Markdown("### π Example Questions:") | |
| gr.Examples( | |
| examples=[ | |
| "What is machine learning?", | |
| "Explain deep learning", | |
| "What are transformers in NLP?", | |
| "How does semantic search work?", | |
| "What is retrieval augmented generation?", | |
| ], | |
| inputs=query_input | |
| ) | |
| with gr.Column(scale=2): | |
| answer_output = gr.Markdown(label="Answer & Research Info") | |
| retrieved_output = gr.Markdown(label="Retrieved Context") | |
| submit_btn.click( | |
| fn=rag.retrieve_and_answer, | |
| inputs=[query_input], | |
| outputs=[answer_output, retrieved_output] | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ## π Complete Research Results | |
| | Method | Semantic Similarity | Improvement | | |
| |--------|-------------------|-------------| | |
| | **Dense (Neural Embeddings)** | **0.1966** | **+64.5%** β | | |
| | Hybrid (Weighted Fusion) | 0.1816 | +51.9% | | |
| | Hybrid (RRF) | 0.1542 | +28.9% | | |
| | Sparse (BM25) | 0.1196 | Baseline | | |
| **Evaluation Details:** | |
| - Dataset: MS MARCO | |
| - Corpus: 5,000 documents | |
| - Queries: 198 real-world questions | |
| - Metrics: ROUGE, BLEU, Semantic Similarity | |
| --- | |
| ## π¨βπ» About This Project | |
| **Title:** AI-Powered Retrieval-Augmented Assistant for Evidence-Based Question Answering | |
| **Author:** [Your Name] | |
| **Institution:** [Your University] | |
| **Location:** Berlin, Germany π©πͺ | |
| **Technologies:** | |
| - Python, PyTorch, Transformers | |
| - Sentence-Transformers, BM25 | |
| - Gradio, HuggingFace | |
| **GitHub:** [Your GitHub Link] | |
| **LinkedIn:** [Your LinkedIn] | |
| **Email:** [Your Email] | |
| --- | |
| ### πΌ Currently Seeking Full-Time Opportunities | |
| I'm actively looking for **AI/ML Engineer** positions in Berlin, Germany! | |
| If you're interested in my work, please reach out at [[email protected]] | |
| --- | |
| Built with β€οΈ using Gradio and HuggingFace Transformers | |
| """) | |
| # Launch | |
| demo.launch() |