Spaces:

NLPGenius
/

CVE-FactChecker

Sleeping

App Files Files Community

CVE-FactChecker / cve_factchecker /orchestrator.py

NLPGenius

fix firebase issues

186fe46 3 months ago

raw

history blame

3.14 kB

	from __future__ import annotations
	from typing import Dict, Any, Optional
	from .config import load_openrouter_config
	from .retriever import VectorNewsRetriever
	from .analyzer import QueryRewriter, ClaimAnalyzer
	from .firebase_loader import FirebaseNewsLoader

	class FactCheckSystem:
	def __init__(self, api_key: Optional[str] = None, vector_dir: str = "./vector_db"):
	cfg = load_openrouter_config(api_key)
	self.cfg = cfg
	self.retriever = VectorNewsRetriever(persist_directory=vector_dir)
	self.rewriter = QueryRewriter(cfg)
	self.analyzer = ClaimAnalyzer(cfg)
	self.firebase = FirebaseNewsLoader()
	def ingest_firebase(self, collection: str = "english_articles", limit: int = 5000, language: str = "English") -> Dict[str, Any]:
	"""Load articles from Firebase and store in vector DB with enhanced English articles support."""
	try:
	# Try to fetch from dedicated English articles collection first
	if language.lower() in ["english", "en"] and collection == "english_articles":
	print(f"🎯 Fetching from dedicated English articles collection...")
	arts = self.firebase.fetch_english_articles(limit=limit)
	else:
	print(f"🔍 Fetching from '{collection}' collection with language filter '{language}'...")
	arts = self.firebase.fetch_articles(limit=limit, language=language)

	if not arts:
	return {
	"synced": 0,
	"collection": collection,
	"success": False,
	"error": f"No articles found in collection '{collection}'"
	}

	print(f"📚 Processing {len(arts)} articles for vector storage...")

	# Clear and refresh vector store
	self.retriever.store_articles_in_vector_db(arts, clear_first=True)

	return {
	"synced": len(arts),
	"collection": collection,
	"success": True,
	"language": language,
	"message": f"Successfully ingested {len(arts)} articles"
	}

	except Exception as e:
	print(f"❌ Firebase ingestion error: {e}")
	return {
	"synced": 0,
	"collection": collection,
	"success": False,
	"error": str(e)
	}
	def fact_check(self, claim: str, k: int = 5) -> Dict[str, Any]:
	base = self.retriever.semantic_search(claim, k=k)
	urls = {a.get("url", "") for a in base}
	for q in self.rewriter.rewrite(claim):
	more = self.retriever.semantic_search(q, k=3)
	for m in more:
	u = m.get("url", "")
	if u and u not in urls:
	base.append(m)
	urls.add(u)
	result = self.analyzer.analyze(claim, base[:8])
	result["sources_used"] = len(base[:8])
	result["retrieved_articles"] = [a.get("url", "") for a in base[:8]]
	return result