Spaces:
Sleeping
Sleeping
File size: 8,320 Bytes
186fe46 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
#!/usr/bin/env python3
"""
Test script specifically for the English articles collection implementation.
"""
import os
import sys
# Add the parent directory to Python path
current_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, current_dir)
def test_english_articles_collection():
"""Test the dedicated English articles collection fetching."""
print("π― Testing English Articles Collection Implementation")
print("=" * 70)
try:
from cve_factchecker.firebase_loader import FirebaseNewsLoader
# Test Firebase loader
loader = FirebaseNewsLoader()
print(f"β
Firebase loader initialized - Project: {loader.project_id}")
print(f"π English articles collection: {loader.config.ENGLISH_ARTICLES_COLLECTION}")
# Test the dedicated English articles method
print("\nπ Testing dedicated English articles collection...")
english_articles = loader.fetch_english_articles(limit=20)
if english_articles:
print(f"β
Successfully fetched {len(english_articles)} articles from English collection")
# Analyze the articles
valid_articles = 0
total_content_length = 0
for i, article in enumerate(english_articles[:5]): # Check first 5
print(f"\nπ Article {i+1}:")
print(f" Title: {article.title[:80]}...")
print(f" Content length: {len(article.content)} characters")
print(f" URL: {article.url}")
print(f" Source: {article.source}")
print(f" Language: {getattr(article, 'language', 'not set')}")
if article.content and len(article.content) > 100:
valid_articles += 1
total_content_length += len(article.content)
# Show content preview
content_preview = article.content[:200].replace('\n', ' ')
print(f" Preview: {content_preview}...")
else:
print(f" β οΈ Insufficient content!")
print(f"\nπ Quality Analysis:")
print(f" Valid articles: {valid_articles}/{min(5, len(english_articles))}")
print(f" Average content length: {total_content_length // max(valid_articles, 1)} chars")
return english_articles
else:
print("β No articles fetched from English collection")
return []
except Exception as e:
print(f"β English articles test failed: {e}")
import traceback
traceback.print_exc()
return []
def test_vector_storage_with_english_articles(articles):
"""Test vector storage with English articles."""
print("\nπ Testing Vector Storage with English Articles")
print("=" * 70)
try:
from cve_factchecker.retriever import VectorRetriever
# Test vector retriever
retriever = VectorRetriever(persist_directory="/tmp/test_english_vector")
print("β
Vector retriever initialized")
# Test article storage
print(f"\nπ¦ Testing storage of {len(articles)} English articles...")
retriever.store_articles_in_vector_db(articles, clear_first=True)
# Test semantic search
print("\nπ Testing semantic search...")
test_queries = [
"security vulnerability",
"cyber attack",
"software bug",
"data breach",
"malware"
]
for query in test_queries[:3]: # Test first 3 queries
results = retriever.search(query, k=3)
print(f" Query: '{query}' -> {len(results)} results")
if results:
for i, doc in enumerate(results[:2]): # Show first 2
content_preview = doc.page_content[:100].replace('\n', ' ')
print(f" {i+1}. {content_preview}...")
return True
except Exception as e:
print(f"β Vector storage test failed: {e}")
import traceback
traceback.print_exc()
return False
def test_complete_system_with_english_collection():
"""Test the complete system with English articles collection."""
print("\nπ Testing Complete System with English Collection")
print("=" * 70)
try:
from cve_factchecker.orchestrator import FactCheckSystem
# Test system initialization
system = FactCheckSystem(vector_dir="/tmp/test_english_system")
print("β
Fact check system initialized")
# Test English articles ingestion
print("\nπ Testing English articles ingestion...")
result = system.ingest_firebase(
collection="english_articles",
limit=15,
language="English"
)
print(f"π Ingestion result:")
print(f" Success: {result.get('success', False)}")
print(f" Articles synced: {result.get('synced', 0)}")
print(f" Collection: {result.get('collection', 'unknown')}")
print(f" Language: {result.get('language', 'unknown')}")
print(f" Message: {result.get('message', 'none')}")
if result.get('error'):
print(f" Error: {result.get('error')}")
# Test fact checking with English articles
if result.get("success") and result.get("synced", 0) > 0:
print("\nπ§ Testing fact checking with English articles...")
test_claims = [
"A new critical security vulnerability was discovered",
"Malware attacks are increasing in frequency",
"Software companies are improving their security measures"
]
for claim in test_claims[:2]: # Test first 2 claims
print(f"\n Testing claim: '{claim}'")
fact_result = system.fact_check(claim)
print(f" Verdict: {fact_result.get('verdict', 'Unknown')}")
print(f" Confidence: {fact_result.get('confidence', 0)}")
print(f" Sources used: {fact_result.get('sources_used', 0)}")
reasoning = fact_result.get('reasoning', 'No reasoning provided')
print(f" Reasoning: {reasoning[:150]}...")
return result.get("success", False)
except Exception as e:
print(f"β Complete system test failed: {e}")
import traceback
traceback.print_exc()
return False
def main():
"""Run all English articles tests."""
print("π― CVE Fact Checker - English Articles Collection Test Suite")
print("=" * 80)
# Test 1: English articles collection
articles = test_english_articles_collection()
# Test 2: Vector storage (if we have articles)
if articles:
vector_success = test_vector_storage_with_english_articles(articles)
else:
print("\nβ οΈ Skipping vector storage test - no articles loaded")
vector_success = False
# Test 3: Complete system integration
system_success = test_complete_system_with_english_collection()
# Summary
print("\nπ Test Results Summary")
print("=" * 80)
print(f"English Collection: {'β
PASS' if articles else 'β FAIL'} ({len(articles)} articles)")
print(f"Vector Storage: {'β
PASS' if vector_success else 'β FAIL'}")
print(f"System Integration: {'β
PASS' if system_success else 'β FAIL'}")
overall_success = bool(articles) and vector_success and system_success
print(f"\nOverall Result: {'β
ALL TESTS PASSED' if overall_success else 'β SOME TESTS FAILED'}")
if overall_success:
print("\nπ English articles collection is working correctly!")
print("π‘ The system is ready for production deployment.")
else:
print("\nβ οΈ Some issues detected. Check the error messages above.")
return overall_success
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1) |