CVE-FactChecker / test_improvements.py
NLPGenius's picture
Robust fixes for Firebase filtering and content processing
48cec82
#!/usr/bin/env python3
"""
Test script to verify the Firebase loading and content processing improvements.
"""
import os
import sys
# Add the parent directory to Python path
current_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, current_dir)
def test_firebase_loading():
"""Test Firebase loading with language filtering."""
print("πŸ§ͺ Testing Firebase Loading and Content Processing")
print("=" * 60)
try:
from cve_factchecker.firebase_loader import FirebaseNewsLoader
# Test Firebase loader
loader = FirebaseNewsLoader()
print(f"βœ… Firebase loader initialized - Project: {loader.project_id}")
# Test with very small limit first
print("\nπŸ” Testing with small sample (5 articles)...")
articles = loader.fetch_articles(limit=5)
if articles:
print(f"βœ… Fetched {len(articles)} sample articles")
# Analyze the first article
first_article = articles[0]
print(f"\nπŸ“‹ Sample Article Analysis:")
print(f" Title: {first_article.title[:100]}...")
print(f" Content length: {len(first_article.content)} characters")
print(f" URL: {first_article.url}")
print(f" Source: {first_article.source}")
print(f" Article ID: {first_article.article_id}")
# Show content preview
if first_article.content:
content_preview = first_article.content[:300].replace('\n', ' ')
print(f" Content preview: {content_preview}...")
else:
print(" ⚠️ No content found!")
# Test language-specific fetching
print("\n🌐 Testing English language filtering...")
english_articles = loader.fetch_articles_by_language("English", limit=10)
if english_articles:
print(f"βœ… Fetched {len(english_articles)} English articles")
# Check content quality
valid_content = 0
for article in english_articles[:3]: # Check first 3
if article.content and len(article.content) > 100:
valid_content += 1
print(f" πŸ“„ '{article.title[:50]}...' - {len(article.content)} chars")
else:
print(f" ⚠️ '{article.title[:50]}...' - insufficient content")
print(f" Content quality: {valid_content}/{min(3, len(english_articles))} articles have substantial content")
else:
print("❌ No English articles found")
return english_articles
except Exception as e:
print(f"❌ Firebase test failed: {e}")
import traceback
traceback.print_exc()
return []
def test_vector_processing(articles):
"""Test vector store processing."""
print("\nπŸ” Testing Vector Store Processing")
print("=" * 60)
try:
from cve_factchecker.retriever import VectorRetriever
# Test vector retriever
retriever = VectorRetriever(persist_directory="/tmp/test_vector_db")
print("βœ… Vector retriever initialized")
# Test article storage
print(f"\nπŸ“¦ Testing storage of {len(articles)} articles...")
retriever.store_articles_in_vector_db(articles, clear_first=True)
# Test retrieval
print("\nπŸ” Testing document retrieval...")
test_query = "security vulnerability"
results = retriever.search(test_query, k=3)
if results:
print(f"βœ… Found {len(results)} relevant documents for '{test_query}'")
for i, doc in enumerate(results):
content_preview = doc.page_content[:100].replace('\n', ' ')
print(f" {i+1}. {content_preview}...")
else:
print("⚠️ No documents found for test query")
return True
except Exception as e:
print(f"❌ Vector processing test failed: {e}")
import traceback
traceback.print_exc()
return False
def test_full_system():
"""Test the complete system integration."""
print("\nπŸš€ Testing Full System Integration")
print("=" * 60)
try:
from cve_factchecker.orchestrator import FactCheckSystem
# Test system initialization
system = FactCheckSystem(vector_dir="/tmp/test_system_vector")
print("βœ… Fact check system initialized")
# Test Firebase ingestion
print("\nπŸ”„ Testing Firebase ingestion...")
result = system.ingest_firebase(limit=10)
if result.get("success"):
print(f"βœ… Ingestion successful: {result.get('synced')} articles")
else:
print(f"⚠️ Ingestion issues: {result.get('error', 'Unknown error')}")
# Test fact checking
print("\n🧠 Testing fact checking...")
test_claim = "A new security vulnerability was discovered in popular software"
fact_result = system.fact_check(test_claim)
print(f" Claim: {test_claim}")
print(f" Verdict: {fact_result.get('verdict', 'Unknown')}")
print(f" Confidence: {fact_result.get('confidence', 0)}")
reasoning = fact_result.get('reasoning', 'No reasoning provided')
print(f" Reasoning: {reasoning[:200]}...")
return True
except Exception as e:
print(f"❌ Full system test failed: {e}")
import traceback
traceback.print_exc()
return False
def main():
"""Run all tests."""
print("πŸ§ͺ CVE Fact Checker - Comprehensive Test Suite")
print("=" * 80)
# Test 1: Firebase loading
articles = test_firebase_loading()
# Test 2: Vector processing (if we have articles)
if articles:
vector_success = test_vector_processing(articles)
else:
print("\n⚠️ Skipping vector processing test - no articles loaded")
vector_success = False
# Test 3: Full system integration
system_success = test_full_system()
# Summary
print("\nπŸ“Š Test Results Summary")
print("=" * 80)
print(f"Firebase Loading: {'βœ… PASS' if articles else '❌ FAIL'}")
print(f"Vector Processing: {'βœ… PASS' if vector_success else '❌ FAIL'}")
print(f"System Integration: {'βœ… PASS' if system_success else '❌ FAIL'}")
overall_success = bool(articles) and vector_success and system_success
print(f"\nOverall Result: {'βœ… ALL TESTS PASSED' if overall_success else '❌ SOME TESTS FAILED'}")
return overall_success
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)