from __future__ import annotations from typing import Dict, Any, Optional from .config import load_openrouter_config from .retriever import VectorNewsRetriever from .analyzer import QueryRewriter, ClaimAnalyzer from .firebase_loader import FirebaseNewsLoader class FactCheckSystem: def __init__(self, api_key: Optional[str] = None, vector_dir: str = "./vector_db"): cfg = load_openrouter_config(api_key) self.cfg = cfg self.retriever = VectorNewsRetriever(persist_directory=vector_dir) self.rewriter = QueryRewriter(cfg) self.analyzer = ClaimAnalyzer(cfg) self.firebase = FirebaseNewsLoader() def ingest_firebase(self, collection: str = "english_articles", limit: int = 5000, language: str = "English") -> Dict[str, Any]: """Load articles from Firebase and store in vector DB with enhanced English articles support.""" try: # Try to fetch from dedicated English articles collection first if language.lower() in ["english", "en"] and collection == "english_articles": print(f"🎯 Fetching from dedicated English articles collection...") arts = self.firebase.fetch_english_articles(limit=limit) else: print(f"🔍 Fetching from '{collection}' collection with language filter '{language}'...") arts = self.firebase.fetch_articles(limit=limit, language=language) if not arts: return { "synced": 0, "collection": collection, "success": False, "error": f"No articles found in collection '{collection}'" } print(f"📚 Processing {len(arts)} articles for vector storage...") # Clear and refresh vector store self.retriever.store_articles_in_vector_db(arts, clear_first=True) return { "synced": len(arts), "collection": collection, "success": True, "language": language, "message": f"Successfully ingested {len(arts)} articles" } except Exception as e: print(f"❌ Firebase ingestion error: {e}") return { "synced": 0, "collection": collection, "success": False, "error": str(e) } def fact_check(self, claim: str, k: int = 5) -> Dict[str, Any]: base = self.retriever.semantic_search(claim, k=k) urls = {a.get("url", "") for a in base} for q in self.rewriter.rewrite(claim): more = self.retriever.semantic_search(q, k=3) for m in more: u = m.get("url", "") if u and u not in urls: base.append(m) urls.add(u) result = self.analyzer.analyze(claim, base[:8]) result["sources_used"] = len(base[:8]) result["retrieved_articles"] = [a.get("url", "") for a in base[:8]] return result