Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Test script specifically for the English articles collection implementation. | |
| """ | |
| import os | |
| import sys | |
| # Add the parent directory to Python path | |
| current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| sys.path.insert(0, current_dir) | |
| def test_english_articles_collection(): | |
| """Test the dedicated English articles collection fetching.""" | |
| print("π― Testing English Articles Collection Implementation") | |
| print("=" * 70) | |
| try: | |
| from cve_factchecker.firebase_loader import FirebaseNewsLoader | |
| # Test Firebase loader | |
| loader = FirebaseNewsLoader() | |
| print(f"β Firebase loader initialized - Project: {loader.project_id}") | |
| print(f"π English articles collection: {loader.config.ENGLISH_ARTICLES_COLLECTION}") | |
| # Test the dedicated English articles method | |
| print("\nπ Testing dedicated English articles collection...") | |
| english_articles = loader.fetch_english_articles(limit=20) | |
| if english_articles: | |
| print(f"β Successfully fetched {len(english_articles)} articles from English collection") | |
| # Analyze the articles | |
| valid_articles = 0 | |
| total_content_length = 0 | |
| for i, article in enumerate(english_articles[:5]): # Check first 5 | |
| print(f"\nπ Article {i+1}:") | |
| print(f" Title: {article.title[:80]}...") | |
| print(f" Content length: {len(article.content)} characters") | |
| print(f" URL: {article.url}") | |
| print(f" Source: {article.source}") | |
| print(f" Language: {getattr(article, 'language', 'not set')}") | |
| if article.content and len(article.content) > 100: | |
| valid_articles += 1 | |
| total_content_length += len(article.content) | |
| # Show content preview | |
| content_preview = article.content[:200].replace('\n', ' ') | |
| print(f" Preview: {content_preview}...") | |
| else: | |
| print(f" β οΈ Insufficient content!") | |
| print(f"\nπ Quality Analysis:") | |
| print(f" Valid articles: {valid_articles}/{min(5, len(english_articles))}") | |
| print(f" Average content length: {total_content_length // max(valid_articles, 1)} chars") | |
| return english_articles | |
| else: | |
| print("β No articles fetched from English collection") | |
| return [] | |
| except Exception as e: | |
| print(f"β English articles test failed: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return [] | |
| def test_vector_storage_with_english_articles(articles): | |
| """Test vector storage with English articles.""" | |
| print("\nπ Testing Vector Storage with English Articles") | |
| print("=" * 70) | |
| try: | |
| from cve_factchecker.retriever import VectorRetriever | |
| # Test vector retriever | |
| retriever = VectorRetriever(persist_directory="/tmp/test_english_vector") | |
| print("β Vector retriever initialized") | |
| # Test article storage | |
| print(f"\nπ¦ Testing storage of {len(articles)} English articles...") | |
| retriever.store_articles_in_vector_db(articles, clear_first=True) | |
| # Test semantic search | |
| print("\nπ Testing semantic search...") | |
| test_queries = [ | |
| "security vulnerability", | |
| "cyber attack", | |
| "software bug", | |
| "data breach", | |
| "malware" | |
| ] | |
| for query in test_queries[:3]: # Test first 3 queries | |
| results = retriever.search(query, k=3) | |
| print(f" Query: '{query}' -> {len(results)} results") | |
| if results: | |
| for i, doc in enumerate(results[:2]): # Show first 2 | |
| content_preview = doc.page_content[:100].replace('\n', ' ') | |
| print(f" {i+1}. {content_preview}...") | |
| return True | |
| except Exception as e: | |
| print(f"β Vector storage test failed: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return False | |
| def test_complete_system_with_english_collection(): | |
| """Test the complete system with English articles collection.""" | |
| print("\nπ Testing Complete System with English Collection") | |
| print("=" * 70) | |
| try: | |
| from cve_factchecker.orchestrator import FactCheckSystem | |
| # Test system initialization | |
| system = FactCheckSystem(vector_dir="/tmp/test_english_system") | |
| print("β Fact check system initialized") | |
| # Test English articles ingestion | |
| print("\nπ Testing English articles ingestion...") | |
| result = system.ingest_firebase( | |
| collection="english_articles", | |
| limit=15, | |
| language="English" | |
| ) | |
| print(f"π Ingestion result:") | |
| print(f" Success: {result.get('success', False)}") | |
| print(f" Articles synced: {result.get('synced', 0)}") | |
| print(f" Collection: {result.get('collection', 'unknown')}") | |
| print(f" Language: {result.get('language', 'unknown')}") | |
| print(f" Message: {result.get('message', 'none')}") | |
| if result.get('error'): | |
| print(f" Error: {result.get('error')}") | |
| # Test fact checking with English articles | |
| if result.get("success") and result.get("synced", 0) > 0: | |
| print("\nπ§ Testing fact checking with English articles...") | |
| test_claims = [ | |
| "A new critical security vulnerability was discovered", | |
| "Malware attacks are increasing in frequency", | |
| "Software companies are improving their security measures" | |
| ] | |
| for claim in test_claims[:2]: # Test first 2 claims | |
| print(f"\n Testing claim: '{claim}'") | |
| fact_result = system.fact_check(claim) | |
| print(f" Verdict: {fact_result.get('verdict', 'Unknown')}") | |
| print(f" Confidence: {fact_result.get('confidence', 0)}") | |
| print(f" Sources used: {fact_result.get('sources_used', 0)}") | |
| reasoning = fact_result.get('reasoning', 'No reasoning provided') | |
| print(f" Reasoning: {reasoning[:150]}...") | |
| return result.get("success", False) | |
| except Exception as e: | |
| print(f"β Complete system test failed: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return False | |
| def main(): | |
| """Run all English articles tests.""" | |
| print("π― CVE Fact Checker - English Articles Collection Test Suite") | |
| print("=" * 80) | |
| # Test 1: English articles collection | |
| articles = test_english_articles_collection() | |
| # Test 2: Vector storage (if we have articles) | |
| if articles: | |
| vector_success = test_vector_storage_with_english_articles(articles) | |
| else: | |
| print("\nβ οΈ Skipping vector storage test - no articles loaded") | |
| vector_success = False | |
| # Test 3: Complete system integration | |
| system_success = test_complete_system_with_english_collection() | |
| # Summary | |
| print("\nπ Test Results Summary") | |
| print("=" * 80) | |
| print(f"English Collection: {'β PASS' if articles else 'β FAIL'} ({len(articles)} articles)") | |
| print(f"Vector Storage: {'β PASS' if vector_success else 'β FAIL'}") | |
| print(f"System Integration: {'β PASS' if system_success else 'β FAIL'}") | |
| overall_success = bool(articles) and vector_success and system_success | |
| print(f"\nOverall Result: {'β ALL TESTS PASSED' if overall_success else 'β SOME TESTS FAILED'}") | |
| if overall_success: | |
| print("\nπ English articles collection is working correctly!") | |
| print("π‘ The system is ready for production deployment.") | |
| else: | |
| print("\nβ οΈ Some issues detected. Check the error messages above.") | |
| return overall_success | |
| if __name__ == "__main__": | |
| success = main() | |
| sys.exit(0 if success else 1) |