File size: 8,320 Bytes
186fe46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#!/usr/bin/env python3
"""
Test script specifically for the English articles collection implementation.
"""

import os
import sys

# Add the parent directory to Python path
current_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, current_dir)

def test_english_articles_collection():
    """Test the dedicated English articles collection fetching."""
    print("🎯 Testing English Articles Collection Implementation")
    print("=" * 70)
    
    try:
        from cve_factchecker.firebase_loader import FirebaseNewsLoader
        
        # Test Firebase loader
        loader = FirebaseNewsLoader()
        print(f"βœ… Firebase loader initialized - Project: {loader.project_id}")
        print(f"πŸ“š English articles collection: {loader.config.ENGLISH_ARTICLES_COLLECTION}")
        
        # Test the dedicated English articles method
        print("\nπŸ” Testing dedicated English articles collection...")
        english_articles = loader.fetch_english_articles(limit=20)
        
        if english_articles:
            print(f"βœ… Successfully fetched {len(english_articles)} articles from English collection")
            
            # Analyze the articles
            valid_articles = 0
            total_content_length = 0
            
            for i, article in enumerate(english_articles[:5]):  # Check first 5
                print(f"\nπŸ“„ Article {i+1}:")
                print(f"   Title: {article.title[:80]}...")
                print(f"   Content length: {len(article.content)} characters")
                print(f"   URL: {article.url}")
                print(f"   Source: {article.source}")
                print(f"   Language: {getattr(article, 'language', 'not set')}")
                
                if article.content and len(article.content) > 100:
                    valid_articles += 1
                    total_content_length += len(article.content)
                    
                    # Show content preview
                    content_preview = article.content[:200].replace('\n', ' ')
                    print(f"   Preview: {content_preview}...")
                else:
                    print(f"   ⚠️ Insufficient content!")
            
            print(f"\nπŸ“Š Quality Analysis:")
            print(f"   Valid articles: {valid_articles}/{min(5, len(english_articles))}")
            print(f"   Average content length: {total_content_length // max(valid_articles, 1)} chars")
            
            return english_articles
        else:
            print("❌ No articles fetched from English collection")
            return []
        
    except Exception as e:
        print(f"❌ English articles test failed: {e}")
        import traceback
        traceback.print_exc()
        return []

def test_vector_storage_with_english_articles(articles):
    """Test vector storage with English articles."""
    print("\nπŸ” Testing Vector Storage with English Articles")
    print("=" * 70)
    
    try:
        from cve_factchecker.retriever import VectorRetriever
        
        # Test vector retriever
        retriever = VectorRetriever(persist_directory="/tmp/test_english_vector")
        print("βœ… Vector retriever initialized")
        
        # Test article storage
        print(f"\nπŸ“¦ Testing storage of {len(articles)} English articles...")
        retriever.store_articles_in_vector_db(articles, clear_first=True)
        
        # Test semantic search
        print("\nπŸ” Testing semantic search...")
        test_queries = [
            "security vulnerability",
            "cyber attack",
            "software bug",
            "data breach",
            "malware"
        ]
        
        for query in test_queries[:3]:  # Test first 3 queries
            results = retriever.search(query, k=3)
            print(f"   Query: '{query}' -> {len(results)} results")
            
            if results:
                for i, doc in enumerate(results[:2]):  # Show first 2
                    content_preview = doc.page_content[:100].replace('\n', ' ')
                    print(f"     {i+1}. {content_preview}...")
        
        return True
        
    except Exception as e:
        print(f"❌ Vector storage test failed: {e}")
        import traceback
        traceback.print_exc()
        return False

def test_complete_system_with_english_collection():
    """Test the complete system with English articles collection."""
    print("\nπŸš€ Testing Complete System with English Collection")
    print("=" * 70)
    
    try:
        from cve_factchecker.orchestrator import FactCheckSystem
        
        # Test system initialization
        system = FactCheckSystem(vector_dir="/tmp/test_english_system")
        print("βœ… Fact check system initialized")
        
        # Test English articles ingestion
        print("\nπŸ”„ Testing English articles ingestion...")
        result = system.ingest_firebase(
            collection="english_articles", 
            limit=15, 
            language="English"
        )
        
        print(f"πŸ“Š Ingestion result:")
        print(f"   Success: {result.get('success', False)}")
        print(f"   Articles synced: {result.get('synced', 0)}")
        print(f"   Collection: {result.get('collection', 'unknown')}")
        print(f"   Language: {result.get('language', 'unknown')}")
        print(f"   Message: {result.get('message', 'none')}")
        
        if result.get('error'):
            print(f"   Error: {result.get('error')}")
        
        # Test fact checking with English articles
        if result.get("success") and result.get("synced", 0) > 0:
            print("\n🧠 Testing fact checking with English articles...")
            test_claims = [
                "A new critical security vulnerability was discovered",
                "Malware attacks are increasing in frequency",
                "Software companies are improving their security measures"
            ]
            
            for claim in test_claims[:2]:  # Test first 2 claims
                print(f"\n   Testing claim: '{claim}'")
                fact_result = system.fact_check(claim)
                
                print(f"     Verdict: {fact_result.get('verdict', 'Unknown')}")
                print(f"     Confidence: {fact_result.get('confidence', 0)}")
                print(f"     Sources used: {fact_result.get('sources_used', 0)}")
                
                reasoning = fact_result.get('reasoning', 'No reasoning provided')
                print(f"     Reasoning: {reasoning[:150]}...")
        
        return result.get("success", False)
        
    except Exception as e:
        print(f"❌ Complete system test failed: {e}")
        import traceback
        traceback.print_exc()
        return False

def main():
    """Run all English articles tests."""
    print("🎯 CVE Fact Checker - English Articles Collection Test Suite")
    print("=" * 80)
    
    # Test 1: English articles collection
    articles = test_english_articles_collection()
    
    # Test 2: Vector storage (if we have articles)
    if articles:
        vector_success = test_vector_storage_with_english_articles(articles)
    else:
        print("\n⚠️ Skipping vector storage test - no articles loaded")
        vector_success = False
    
    # Test 3: Complete system integration
    system_success = test_complete_system_with_english_collection()
    
    # Summary
    print("\nπŸ“Š Test Results Summary")
    print("=" * 80)
    print(f"English Collection: {'βœ… PASS' if articles else '❌ FAIL'} ({len(articles)} articles)")
    print(f"Vector Storage: {'βœ… PASS' if vector_success else '❌ FAIL'}")
    print(f"System Integration: {'βœ… PASS' if system_success else '❌ FAIL'}")
    
    overall_success = bool(articles) and vector_success and system_success
    print(f"\nOverall Result: {'βœ… ALL TESTS PASSED' if overall_success else '❌ SOME TESTS FAILED'}")
    
    if overall_success:
        print("\nπŸŽ‰ English articles collection is working correctly!")
        print("πŸ’‘ The system is ready for production deployment.")
    else:
        print("\n⚠️ Some issues detected. Check the error messages above.")
    
    return overall_success

if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)