NLPGenius's picture
Fix permission errors, rate limiting, and add English language filtering
e06a21d
raw
history blame
1.99 kB
from __future__ import annotations
from typing import Dict, Any, Optional
from .config import load_openrouter_config
from .retriever import VectorNewsRetriever
from .analyzer import QueryRewriter, ClaimAnalyzer
from .firebase_loader import FirebaseNewsLoader
class FactCheckSystem:
def __init__(self, api_key: Optional[str] = None, vector_dir: str = "./vector_db"):
cfg = load_openrouter_config(api_key)
self.cfg = cfg
self.retriever = VectorNewsRetriever(persist_directory=vector_dir)
self.rewriter = QueryRewriter(cfg)
self.analyzer = ClaimAnalyzer(cfg)
self.firebase = FirebaseNewsLoader()
def ingest_firebase(self, collection: str = "articles", limit: int = 5000, language: str = "English") -> Dict[str, Any]:
"""Load articles from Firebase and store in vector DB with language filter and limit."""
arts = self.firebase.fetch_articles(limit=limit, language=language)
if not arts:
return {"synced": 0, "collection": collection, "success": False}
# Clear and refresh vector store
self.retriever.store_articles_in_vector_db(arts, clear_first=True)
return {"synced": len(arts), "collection": collection, "success": True, "language": language}
return {"synced": len(arts), "collection": collection, "success": True}
def fact_check(self, claim: str, k: int = 5) -> Dict[str, Any]:
base = self.retriever.semantic_search(claim, k=k)
urls = {a.get("url", "") for a in base}
for q in self.rewriter.rewrite(claim):
more = self.retriever.semantic_search(q, k=3)
for m in more:
u = m.get("url", "")
if u and u not in urls:
base.append(m)
urls.add(u)
result = self.analyzer.analyze(claim, base[:8])
result["sources_used"] = len(base[:8])
result["retrieved_articles"] = [a.get("url", "") for a in base[:8]]
return result