Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| from typing import Dict, Any, Optional | |
| from .config import load_openrouter_config | |
| from .retriever import VectorNewsRetriever | |
| from .analyzer import QueryRewriter, ClaimAnalyzer | |
| from .firebase_loader import FirebaseNewsLoader | |
| class FactCheckSystem: | |
| def __init__(self, api_key: Optional[str] = None, vector_dir: str = "./vector_db"): | |
| cfg = load_openrouter_config(api_key) | |
| self.cfg = cfg | |
| self.retriever = VectorNewsRetriever(persist_directory=vector_dir) | |
| self.rewriter = QueryRewriter(cfg) | |
| self.analyzer = ClaimAnalyzer(cfg) | |
| self.firebase = FirebaseNewsLoader() | |
| def ingest_firebase(self, collection: str = "english_articles", limit: int = 5000, language: str = "English") -> Dict[str, Any]: | |
| """Load articles from Firebase and store in vector DB with enhanced English articles support.""" | |
| try: | |
| # Try to fetch from dedicated English articles collection first | |
| if language.lower() in ["english", "en"] and collection == "english_articles": | |
| print(f"π― Fetching from dedicated English articles collection...") | |
| arts = self.firebase.fetch_english_articles(limit=limit) | |
| else: | |
| print(f"π Fetching from '{collection}' collection with language filter '{language}'...") | |
| arts = self.firebase.fetch_articles(limit=limit, language=language) | |
| if not arts: | |
| return { | |
| "synced": 0, | |
| "collection": collection, | |
| "success": False, | |
| "error": f"No articles found in collection '{collection}'" | |
| } | |
| print(f"π Processing {len(arts)} articles for vector storage...") | |
| # Clear and refresh vector store | |
| self.retriever.store_articles_in_vector_db(arts, clear_first=True) | |
| return { | |
| "synced": len(arts), | |
| "collection": collection, | |
| "success": True, | |
| "language": language, | |
| "message": f"Successfully ingested {len(arts)} articles" | |
| } | |
| except Exception as e: | |
| print(f"β Firebase ingestion error: {e}") | |
| return { | |
| "synced": 0, | |
| "collection": collection, | |
| "success": False, | |
| "error": str(e) | |
| } | |
| def fact_check(self, claim: str, k: int = 5) -> Dict[str, Any]: | |
| base = self.retriever.semantic_search(claim, k=k) | |
| urls = {a.get("url", "") for a in base} | |
| for q in self.rewriter.rewrite(claim): | |
| more = self.retriever.semantic_search(q, k=3) | |
| for m in more: | |
| u = m.get("url", "") | |
| if u and u not in urls: | |
| base.append(m) | |
| urls.add(u) | |
| result = self.analyzer.analyze(claim, base[:8]) | |
| result["sources_used"] = len(base[:8]) | |
| result["retrieved_articles"] = [a.get("url", "") for a in base[:8]] | |
| return result | |