Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Test script to verify the Firebase loading and content processing improvements. | |
| """ | |
| import os | |
| import sys | |
| # Add the parent directory to Python path | |
| current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| sys.path.insert(0, current_dir) | |
| def test_firebase_loading(): | |
| """Test Firebase loading with language filtering.""" | |
| print("π§ͺ Testing Firebase Loading and Content Processing") | |
| print("=" * 60) | |
| try: | |
| from cve_factchecker.firebase_loader import FirebaseNewsLoader | |
| # Test Firebase loader | |
| loader = FirebaseNewsLoader() | |
| print(f"β Firebase loader initialized - Project: {loader.project_id}") | |
| # Test with very small limit first | |
| print("\nπ Testing with small sample (5 articles)...") | |
| articles = loader.fetch_articles(limit=5) | |
| if articles: | |
| print(f"β Fetched {len(articles)} sample articles") | |
| # Analyze the first article | |
| first_article = articles[0] | |
| print(f"\nπ Sample Article Analysis:") | |
| print(f" Title: {first_article.title[:100]}...") | |
| print(f" Content length: {len(first_article.content)} characters") | |
| print(f" URL: {first_article.url}") | |
| print(f" Source: {first_article.source}") | |
| print(f" Article ID: {first_article.article_id}") | |
| # Show content preview | |
| if first_article.content: | |
| content_preview = first_article.content[:300].replace('\n', ' ') | |
| print(f" Content preview: {content_preview}...") | |
| else: | |
| print(" β οΈ No content found!") | |
| # Test language-specific fetching | |
| print("\nπ Testing English language filtering...") | |
| english_articles = loader.fetch_articles_by_language("English", limit=10) | |
| if english_articles: | |
| print(f"β Fetched {len(english_articles)} English articles") | |
| # Check content quality | |
| valid_content = 0 | |
| for article in english_articles[:3]: # Check first 3 | |
| if article.content and len(article.content) > 100: | |
| valid_content += 1 | |
| print(f" π '{article.title[:50]}...' - {len(article.content)} chars") | |
| else: | |
| print(f" β οΈ '{article.title[:50]}...' - insufficient content") | |
| print(f" Content quality: {valid_content}/{min(3, len(english_articles))} articles have substantial content") | |
| else: | |
| print("β No English articles found") | |
| return english_articles | |
| except Exception as e: | |
| print(f"β Firebase test failed: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return [] | |
| def test_vector_processing(articles): | |
| """Test vector store processing.""" | |
| print("\nπ Testing Vector Store Processing") | |
| print("=" * 60) | |
| try: | |
| from cve_factchecker.retriever import VectorRetriever | |
| # Test vector retriever | |
| retriever = VectorRetriever(persist_directory="/tmp/test_vector_db") | |
| print("β Vector retriever initialized") | |
| # Test article storage | |
| print(f"\nπ¦ Testing storage of {len(articles)} articles...") | |
| retriever.store_articles_in_vector_db(articles, clear_first=True) | |
| # Test retrieval | |
| print("\nπ Testing document retrieval...") | |
| test_query = "security vulnerability" | |
| results = retriever.search(test_query, k=3) | |
| if results: | |
| print(f"β Found {len(results)} relevant documents for '{test_query}'") | |
| for i, doc in enumerate(results): | |
| content_preview = doc.page_content[:100].replace('\n', ' ') | |
| print(f" {i+1}. {content_preview}...") | |
| else: | |
| print("β οΈ No documents found for test query") | |
| return True | |
| except Exception as e: | |
| print(f"β Vector processing test failed: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return False | |
| def test_full_system(): | |
| """Test the complete system integration.""" | |
| print("\nπ Testing Full System Integration") | |
| print("=" * 60) | |
| try: | |
| from cve_factchecker.orchestrator import FactCheckSystem | |
| # Test system initialization | |
| system = FactCheckSystem(vector_dir="/tmp/test_system_vector") | |
| print("β Fact check system initialized") | |
| # Test Firebase ingestion | |
| print("\nπ Testing Firebase ingestion...") | |
| result = system.ingest_firebase(limit=10) | |
| if result.get("success"): | |
| print(f"β Ingestion successful: {result.get('synced')} articles") | |
| else: | |
| print(f"β οΈ Ingestion issues: {result.get('error', 'Unknown error')}") | |
| # Test fact checking | |
| print("\nπ§ Testing fact checking...") | |
| test_claim = "A new security vulnerability was discovered in popular software" | |
| fact_result = system.fact_check(test_claim) | |
| print(f" Claim: {test_claim}") | |
| print(f" Verdict: {fact_result.get('verdict', 'Unknown')}") | |
| print(f" Confidence: {fact_result.get('confidence', 0)}") | |
| reasoning = fact_result.get('reasoning', 'No reasoning provided') | |
| print(f" Reasoning: {reasoning[:200]}...") | |
| return True | |
| except Exception as e: | |
| print(f"β Full system test failed: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return False | |
| def main(): | |
| """Run all tests.""" | |
| print("π§ͺ CVE Fact Checker - Comprehensive Test Suite") | |
| print("=" * 80) | |
| # Test 1: Firebase loading | |
| articles = test_firebase_loading() | |
| # Test 2: Vector processing (if we have articles) | |
| if articles: | |
| vector_success = test_vector_processing(articles) | |
| else: | |
| print("\nβ οΈ Skipping vector processing test - no articles loaded") | |
| vector_success = False | |
| # Test 3: Full system integration | |
| system_success = test_full_system() | |
| # Summary | |
| print("\nπ Test Results Summary") | |
| print("=" * 80) | |
| print(f"Firebase Loading: {'β PASS' if articles else 'β FAIL'}") | |
| print(f"Vector Processing: {'β PASS' if vector_success else 'β FAIL'}") | |
| print(f"System Integration: {'β PASS' if system_success else 'β FAIL'}") | |
| overall_success = bool(articles) and vector_success and system_success | |
| print(f"\nOverall Result: {'β ALL TESTS PASSED' if overall_success else 'β SOME TESTS FAILED'}") | |
| return overall_success | |
| if __name__ == "__main__": | |
| success = main() | |
| sys.exit(0 if success else 1) |