from __future__ import annotations import os from typing import Dict, Any, Optional, List from .firebase_loader import FirebaseNewsLoader from .retriever import VectorNewsRetriever class FirebaseVectorSync: def __init__(self, vector_persist_dir: str = "./vector_db"): # Auto-detect writable directory for Hugging Face Spaces if vector_persist_dir == "./vector_db": if os.path.exists("/data"): vector_persist_dir = "/data/vector_db" elif os.path.exists("/tmp"): vector_persist_dir = "/tmp/vector_db" self.firebase_loader = FirebaseNewsLoader() self.vector_retriever = VectorNewsRetriever(persist_directory=vector_persist_dir) def analyze_firebase_schema(self, collection: str = "Articles") -> Dict[str, Any]: schema = self.firebase_loader.analyze_schema(collection) return schema def sync_from_firebase(self, collection_name: str = "Articles", limit: Optional[int] = None) -> Dict[str, Any]: # If limit is None => fetch ALL via pagination logic in loader. arts = self.firebase_loader.load_news_articles(collection_name, limit if limit is not None else None) if not arts and collection_name != "Articles": arts = self.firebase_loader.load_news_articles("Articles", limit if limit is not None else None) if arts: collection_name = "Articles" if not arts: return {"error": "No articles found", "synced": 0, "success": False} self.vector_retriever.store_articles_in_vector_db(arts) return {"synced": len(arts), "collection": collection_name, "success": True} def quick_sync(self, limit: int = 100) -> Dict[str, Any]: return self.sync_from_firebase(limit=limit) def full_sync(self) -> Dict[str, Any]: return self.sync_from_firebase(limit=None) def list_firebase_collections(self) -> List[Dict[str, Any]]: return self.firebase_loader.get_collections_info()