CVE-FactChecker / test_english_collection.py
NLPGenius's picture
fix firebase issues
186fe46
raw
history blame
8.32 kB
#!/usr/bin/env python3
"""
Test script specifically for the English articles collection implementation.
"""
import os
import sys
# Add the parent directory to Python path
current_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, current_dir)
def test_english_articles_collection():
"""Test the dedicated English articles collection fetching."""
print("🎯 Testing English Articles Collection Implementation")
print("=" * 70)
try:
from cve_factchecker.firebase_loader import FirebaseNewsLoader
# Test Firebase loader
loader = FirebaseNewsLoader()
print(f"βœ… Firebase loader initialized - Project: {loader.project_id}")
print(f"πŸ“š English articles collection: {loader.config.ENGLISH_ARTICLES_COLLECTION}")
# Test the dedicated English articles method
print("\nπŸ” Testing dedicated English articles collection...")
english_articles = loader.fetch_english_articles(limit=20)
if english_articles:
print(f"βœ… Successfully fetched {len(english_articles)} articles from English collection")
# Analyze the articles
valid_articles = 0
total_content_length = 0
for i, article in enumerate(english_articles[:5]): # Check first 5
print(f"\nπŸ“„ Article {i+1}:")
print(f" Title: {article.title[:80]}...")
print(f" Content length: {len(article.content)} characters")
print(f" URL: {article.url}")
print(f" Source: {article.source}")
print(f" Language: {getattr(article, 'language', 'not set')}")
if article.content and len(article.content) > 100:
valid_articles += 1
total_content_length += len(article.content)
# Show content preview
content_preview = article.content[:200].replace('\n', ' ')
print(f" Preview: {content_preview}...")
else:
print(f" ⚠️ Insufficient content!")
print(f"\nπŸ“Š Quality Analysis:")
print(f" Valid articles: {valid_articles}/{min(5, len(english_articles))}")
print(f" Average content length: {total_content_length // max(valid_articles, 1)} chars")
return english_articles
else:
print("❌ No articles fetched from English collection")
return []
except Exception as e:
print(f"❌ English articles test failed: {e}")
import traceback
traceback.print_exc()
return []
def test_vector_storage_with_english_articles(articles):
"""Test vector storage with English articles."""
print("\nπŸ” Testing Vector Storage with English Articles")
print("=" * 70)
try:
from cve_factchecker.retriever import VectorRetriever
# Test vector retriever
retriever = VectorRetriever(persist_directory="/tmp/test_english_vector")
print("βœ… Vector retriever initialized")
# Test article storage
print(f"\nπŸ“¦ Testing storage of {len(articles)} English articles...")
retriever.store_articles_in_vector_db(articles, clear_first=True)
# Test semantic search
print("\nπŸ” Testing semantic search...")
test_queries = [
"security vulnerability",
"cyber attack",
"software bug",
"data breach",
"malware"
]
for query in test_queries[:3]: # Test first 3 queries
results = retriever.search(query, k=3)
print(f" Query: '{query}' -> {len(results)} results")
if results:
for i, doc in enumerate(results[:2]): # Show first 2
content_preview = doc.page_content[:100].replace('\n', ' ')
print(f" {i+1}. {content_preview}...")
return True
except Exception as e:
print(f"❌ Vector storage test failed: {e}")
import traceback
traceback.print_exc()
return False
def test_complete_system_with_english_collection():
"""Test the complete system with English articles collection."""
print("\nπŸš€ Testing Complete System with English Collection")
print("=" * 70)
try:
from cve_factchecker.orchestrator import FactCheckSystem
# Test system initialization
system = FactCheckSystem(vector_dir="/tmp/test_english_system")
print("βœ… Fact check system initialized")
# Test English articles ingestion
print("\nπŸ”„ Testing English articles ingestion...")
result = system.ingest_firebase(
collection="english_articles",
limit=15,
language="English"
)
print(f"πŸ“Š Ingestion result:")
print(f" Success: {result.get('success', False)}")
print(f" Articles synced: {result.get('synced', 0)}")
print(f" Collection: {result.get('collection', 'unknown')}")
print(f" Language: {result.get('language', 'unknown')}")
print(f" Message: {result.get('message', 'none')}")
if result.get('error'):
print(f" Error: {result.get('error')}")
# Test fact checking with English articles
if result.get("success") and result.get("synced", 0) > 0:
print("\n🧠 Testing fact checking with English articles...")
test_claims = [
"A new critical security vulnerability was discovered",
"Malware attacks are increasing in frequency",
"Software companies are improving their security measures"
]
for claim in test_claims[:2]: # Test first 2 claims
print(f"\n Testing claim: '{claim}'")
fact_result = system.fact_check(claim)
print(f" Verdict: {fact_result.get('verdict', 'Unknown')}")
print(f" Confidence: {fact_result.get('confidence', 0)}")
print(f" Sources used: {fact_result.get('sources_used', 0)}")
reasoning = fact_result.get('reasoning', 'No reasoning provided')
print(f" Reasoning: {reasoning[:150]}...")
return result.get("success", False)
except Exception as e:
print(f"❌ Complete system test failed: {e}")
import traceback
traceback.print_exc()
return False
def main():
"""Run all English articles tests."""
print("🎯 CVE Fact Checker - English Articles Collection Test Suite")
print("=" * 80)
# Test 1: English articles collection
articles = test_english_articles_collection()
# Test 2: Vector storage (if we have articles)
if articles:
vector_success = test_vector_storage_with_english_articles(articles)
else:
print("\n⚠️ Skipping vector storage test - no articles loaded")
vector_success = False
# Test 3: Complete system integration
system_success = test_complete_system_with_english_collection()
# Summary
print("\nπŸ“Š Test Results Summary")
print("=" * 80)
print(f"English Collection: {'βœ… PASS' if articles else '❌ FAIL'} ({len(articles)} articles)")
print(f"Vector Storage: {'βœ… PASS' if vector_success else '❌ FAIL'}")
print(f"System Integration: {'βœ… PASS' if system_success else '❌ FAIL'}")
overall_success = bool(articles) and vector_success and system_success
print(f"\nOverall Result: {'βœ… ALL TESTS PASSED' if overall_success else '❌ SOME TESTS FAILED'}")
if overall_success:
print("\nπŸŽ‰ English articles collection is working correctly!")
print("πŸ’‘ The system is ready for production deployment.")
else:
print("\n⚠️ Some issues detected. Check the error messages above.")
return overall_success
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)