#!/usr/bin/env python3 """ Test script to verify the Firebase loading and content processing improvements. """ import os import sys # Add the parent directory to Python path current_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, current_dir) def test_firebase_loading(): """Test Firebase loading with language filtering.""" print("๐Ÿงช Testing Firebase Loading and Content Processing") print("=" * 60) try: from cve_factchecker.firebase_loader import FirebaseNewsLoader # Test Firebase loader loader = FirebaseNewsLoader() print(f"โœ… Firebase loader initialized - Project: {loader.project_id}") # Test with very small limit first print("\n๐Ÿ” Testing with small sample (5 articles)...") articles = loader.fetch_articles(limit=5) if articles: print(f"โœ… Fetched {len(articles)} sample articles") # Analyze the first article first_article = articles[0] print(f"\n๐Ÿ“‹ Sample Article Analysis:") print(f" Title: {first_article.title[:100]}...") print(f" Content length: {len(first_article.content)} characters") print(f" URL: {first_article.url}") print(f" Source: {first_article.source}") print(f" Article ID: {first_article.article_id}") # Show content preview if first_article.content: content_preview = first_article.content[:300].replace('\n', ' ') print(f" Content preview: {content_preview}...") else: print(" โš ๏ธ No content found!") # Test language-specific fetching print("\n๐ŸŒ Testing English language filtering...") english_articles = loader.fetch_articles_by_language("English", limit=10) if english_articles: print(f"โœ… Fetched {len(english_articles)} English articles") # Check content quality valid_content = 0 for article in english_articles[:3]: # Check first 3 if article.content and len(article.content) > 100: valid_content += 1 print(f" ๐Ÿ“„ '{article.title[:50]}...' - {len(article.content)} chars") else: print(f" โš ๏ธ '{article.title[:50]}...' - insufficient content") print(f" Content quality: {valid_content}/{min(3, len(english_articles))} articles have substantial content") else: print("โŒ No English articles found") return english_articles except Exception as e: print(f"โŒ Firebase test failed: {e}") import traceback traceback.print_exc() return [] def test_vector_processing(articles): """Test vector store processing.""" print("\n๐Ÿ” Testing Vector Store Processing") print("=" * 60) try: from cve_factchecker.retriever import VectorRetriever # Test vector retriever retriever = VectorRetriever(persist_directory="/tmp/test_vector_db") print("โœ… Vector retriever initialized") # Test article storage print(f"\n๐Ÿ“ฆ Testing storage of {len(articles)} articles...") retriever.store_articles_in_vector_db(articles, clear_first=True) # Test retrieval print("\n๐Ÿ” Testing document retrieval...") test_query = "security vulnerability" results = retriever.search(test_query, k=3) if results: print(f"โœ… Found {len(results)} relevant documents for '{test_query}'") for i, doc in enumerate(results): content_preview = doc.page_content[:100].replace('\n', ' ') print(f" {i+1}. {content_preview}...") else: print("โš ๏ธ No documents found for test query") return True except Exception as e: print(f"โŒ Vector processing test failed: {e}") import traceback traceback.print_exc() return False def test_full_system(): """Test the complete system integration.""" print("\n๐Ÿš€ Testing Full System Integration") print("=" * 60) try: from cve_factchecker.orchestrator import FactCheckSystem # Test system initialization system = FactCheckSystem(vector_dir="/tmp/test_system_vector") print("โœ… Fact check system initialized") # Test Firebase ingestion print("\n๐Ÿ”„ Testing Firebase ingestion...") result = system.ingest_firebase(limit=10) if result.get("success"): print(f"โœ… Ingestion successful: {result.get('synced')} articles") else: print(f"โš ๏ธ Ingestion issues: {result.get('error', 'Unknown error')}") # Test fact checking print("\n๐Ÿง  Testing fact checking...") test_claim = "A new security vulnerability was discovered in popular software" fact_result = system.fact_check(test_claim) print(f" Claim: {test_claim}") print(f" Verdict: {fact_result.get('verdict', 'Unknown')}") print(f" Confidence: {fact_result.get('confidence', 0)}") reasoning = fact_result.get('reasoning', 'No reasoning provided') print(f" Reasoning: {reasoning[:200]}...") return True except Exception as e: print(f"โŒ Full system test failed: {e}") import traceback traceback.print_exc() return False def main(): """Run all tests.""" print("๐Ÿงช CVE Fact Checker - Comprehensive Test Suite") print("=" * 80) # Test 1: Firebase loading articles = test_firebase_loading() # Test 2: Vector processing (if we have articles) if articles: vector_success = test_vector_processing(articles) else: print("\nโš ๏ธ Skipping vector processing test - no articles loaded") vector_success = False # Test 3: Full system integration system_success = test_full_system() # Summary print("\n๐Ÿ“Š Test Results Summary") print("=" * 80) print(f"Firebase Loading: {'โœ… PASS' if articles else 'โŒ FAIL'}") print(f"Vector Processing: {'โœ… PASS' if vector_success else 'โŒ FAIL'}") print(f"System Integration: {'โœ… PASS' if system_success else 'โŒ FAIL'}") overall_success = bool(articles) and vector_success and system_success print(f"\nOverall Result: {'โœ… ALL TESTS PASSED' if overall_success else 'โŒ SOME TESTS FAILED'}") return overall_success if __name__ == "__main__": success = main() sys.exit(0 if success else 1)