#!/usr/bin/env python3 """ Test script specifically for the English articles collection implementation. """ import os import sys # Add the parent directory to Python path current_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, current_dir) def test_english_articles_collection(): """Test the dedicated English articles collection fetching.""" print("šŸŽÆ Testing English Articles Collection Implementation") print("=" * 70) try: from cve_factchecker.firebase_loader import FirebaseNewsLoader # Test Firebase loader loader = FirebaseNewsLoader() print(f"āœ… Firebase loader initialized - Project: {loader.project_id}") print(f"šŸ“š English articles collection: {loader.config.ENGLISH_ARTICLES_COLLECTION}") # Test the dedicated English articles method print("\nšŸ” Testing dedicated English articles collection...") english_articles = loader.fetch_english_articles(limit=20) if english_articles: print(f"āœ… Successfully fetched {len(english_articles)} articles from English collection") # Analyze the articles valid_articles = 0 total_content_length = 0 for i, article in enumerate(english_articles[:5]): # Check first 5 print(f"\nšŸ“„ Article {i+1}:") print(f" Title: {article.title[:80]}...") print(f" Content length: {len(article.content)} characters") print(f" URL: {article.url}") print(f" Source: {article.source}") print(f" Language: {getattr(article, 'language', 'not set')}") if article.content and len(article.content) > 100: valid_articles += 1 total_content_length += len(article.content) # Show content preview content_preview = article.content[:200].replace('\n', ' ') print(f" Preview: {content_preview}...") else: print(f" āš ļø Insufficient content!") print(f"\nšŸ“Š Quality Analysis:") print(f" Valid articles: {valid_articles}/{min(5, len(english_articles))}") print(f" Average content length: {total_content_length // max(valid_articles, 1)} chars") return english_articles else: print("āŒ No articles fetched from English collection") return [] except Exception as e: print(f"āŒ English articles test failed: {e}") import traceback traceback.print_exc() return [] def test_vector_storage_with_english_articles(articles): """Test vector storage with English articles.""" print("\nšŸ” Testing Vector Storage with English Articles") print("=" * 70) try: from cve_factchecker.retriever import VectorRetriever # Test vector retriever retriever = VectorRetriever(persist_directory="/tmp/test_english_vector") print("āœ… Vector retriever initialized") # Test article storage print(f"\nšŸ“¦ Testing storage of {len(articles)} English articles...") retriever.store_articles_in_vector_db(articles, clear_first=True) # Test semantic search print("\nšŸ” Testing semantic search...") test_queries = [ "security vulnerability", "cyber attack", "software bug", "data breach", "malware" ] for query in test_queries[:3]: # Test first 3 queries results = retriever.search(query, k=3) print(f" Query: '{query}' -> {len(results)} results") if results: for i, doc in enumerate(results[:2]): # Show first 2 content_preview = doc.page_content[:100].replace('\n', ' ') print(f" {i+1}. {content_preview}...") return True except Exception as e: print(f"āŒ Vector storage test failed: {e}") import traceback traceback.print_exc() return False def test_complete_system_with_english_collection(): """Test the complete system with English articles collection.""" print("\nšŸš€ Testing Complete System with English Collection") print("=" * 70) try: from cve_factchecker.orchestrator import FactCheckSystem # Test system initialization system = FactCheckSystem(vector_dir="/tmp/test_english_system") print("āœ… Fact check system initialized") # Test English articles ingestion print("\nšŸ”„ Testing English articles ingestion...") result = system.ingest_firebase( collection="english_articles", limit=15, language="English" ) print(f"šŸ“Š Ingestion result:") print(f" Success: {result.get('success', False)}") print(f" Articles synced: {result.get('synced', 0)}") print(f" Collection: {result.get('collection', 'unknown')}") print(f" Language: {result.get('language', 'unknown')}") print(f" Message: {result.get('message', 'none')}") if result.get('error'): print(f" Error: {result.get('error')}") # Test fact checking with English articles if result.get("success") and result.get("synced", 0) > 0: print("\n🧠 Testing fact checking with English articles...") test_claims = [ "A new critical security vulnerability was discovered", "Malware attacks are increasing in frequency", "Software companies are improving their security measures" ] for claim in test_claims[:2]: # Test first 2 claims print(f"\n Testing claim: '{claim}'") fact_result = system.fact_check(claim) print(f" Verdict: {fact_result.get('verdict', 'Unknown')}") print(f" Confidence: {fact_result.get('confidence', 0)}") print(f" Sources used: {fact_result.get('sources_used', 0)}") reasoning = fact_result.get('reasoning', 'No reasoning provided') print(f" Reasoning: {reasoning[:150]}...") return result.get("success", False) except Exception as e: print(f"āŒ Complete system test failed: {e}") import traceback traceback.print_exc() return False def main(): """Run all English articles tests.""" print("šŸŽÆ CVE Fact Checker - English Articles Collection Test Suite") print("=" * 80) # Test 1: English articles collection articles = test_english_articles_collection() # Test 2: Vector storage (if we have articles) if articles: vector_success = test_vector_storage_with_english_articles(articles) else: print("\nāš ļø Skipping vector storage test - no articles loaded") vector_success = False # Test 3: Complete system integration system_success = test_complete_system_with_english_collection() # Summary print("\nšŸ“Š Test Results Summary") print("=" * 80) print(f"English Collection: {'āœ… PASS' if articles else 'āŒ FAIL'} ({len(articles)} articles)") print(f"Vector Storage: {'āœ… PASS' if vector_success else 'āŒ FAIL'}") print(f"System Integration: {'āœ… PASS' if system_success else 'āŒ FAIL'}") overall_success = bool(articles) and vector_success and system_success print(f"\nOverall Result: {'āœ… ALL TESTS PASSED' if overall_success else 'āŒ SOME TESTS FAILED'}") if overall_success: print("\nšŸŽ‰ English articles collection is working correctly!") print("šŸ’” The system is ready for production deployment.") else: print("\nāš ļø Some issues detected. Check the error messages above.") return overall_success if __name__ == "__main__": success = main() sys.exit(0 if success else 1)