Spaces:
Running
Running
| import os | |
| import time | |
| from cve_factchecker.retriever import VectorNewsRetriever | |
| from cve_factchecker.models import NewsArticle | |
| # Keep this test lightweight and isolated | |
| os.environ["USE_DUMMY_EMBEDDINGS"] = "true" | |
| os.environ["VECTOR_PERSIST_DIR"] = os.path.abspath("./vector_db_hybrid_test") | |
| articles = [ | |
| NewsArticle( | |
| title="Militants storm FC lines in Bannu", | |
| content=( | |
| "At least five militants attacked the Frontier Corps (FC) Lines in Bannu, Khyber-Pakhtunkhwa. " | |
| "Security forces responded swiftly, and the situation is under control." | |
| ), | |
| url="https://tribune.com.pk/story/2564614/militants-storm-fc-lines-in-bannu", | |
| source="The Express Tribune", | |
| published_date="2025-09-15", | |
| scraped_date=str(int(time.time())), | |
| article_id="a1", | |
| language="English", | |
| ), | |
| NewsArticle( | |
| title="Six soldiers martyred; five terrorists killed in Bannu FC compound attack", | |
| content=( | |
| "An attack on the FC compound in Bannu resulted in the martyrdom of six soldiers." | |
| "Reports indicate five terrorists were killed in the exchange." | |
| ), | |
| url="https://dailytimes.com.pk/1363459/six-soldiers-martyred-five-terrorists-killed-in-attack-on-bannu-fc-compound/", | |
| source="Daily Times", | |
| published_date="2025-09-15", | |
| scraped_date=str(int(time.time())), | |
| article_id="a2", | |
| language="English", | |
| ), | |
| NewsArticle( | |
| title="KP operations update: militants neutralized", | |
| content=( | |
| "Security operations in Khyber-Pakhtunkhwa neutralized multiple militants. The Frontier Corps participated " | |
| "in the operations across the province." | |
| ), | |
| url="https://dailytimes.com.pk/1368975/31-indian-backed-militants-killed-in-kp-operations/", | |
| source="Daily Times", | |
| published_date="2025-09-16", | |
| scraped_date=str(int(time.time())), | |
| article_id="a3", | |
| language="English", | |
| ), | |
| NewsArticle( | |
| title="Sports: Cricket series announced", | |
| content="Pakistan Cricket Board announced a new bilateral series in Lahore next month.", | |
| url="https://example.com/sports/cricket-series", | |
| source="Example Sports", | |
| published_date="2025-09-10", | |
| scraped_date=str(int(time.time())), | |
| article_id="a4", | |
| language="English", | |
| ), | |
| ] | |
| if __name__ == "__main__": | |
| retriever = VectorNewsRetriever(persist_directory=os.environ["VECTOR_PERSIST_DIR"]) | |
| retriever.store_articles_in_vector_db(articles, clear_first=True) | |
| query = ( | |
| "At least five militants attacked the Frontier Corps (FC) Lines in Bannu, Khyber-Pakhtunkhwa" | |
| ) | |
| print("\n=== Hybrid Retrieval Results (k=5) ===") | |
| results = retriever.semantic_search(query, k=5) | |
| for i, r in enumerate(results, 1): | |
| print(f"{i}. {r.get('title')} | {r.get('url')} | source={r.get('source')}") | |
| snippet = (r.get('content','') or '')[:120].replace('\n', ' ') | |
| print(f" Snippet: {snippet}...") | |
| # Basic sanity checks | |
| print("\nCounts:") | |
| print("vector_count:", retriever.get_vector_count()) | |
| print("results_count:", len(results)) | |