File size: 5,318 Bytes
186fe46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/env python3
"""
Test the corrected English articles collection.
"""

import os
import sys

# Add the parent directory to Python path
current_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, current_dir)

def test_english_articles():
    """Test the corrected English articles collection."""
    print("🎯 Testing Corrected English Articles Collection")
    print("=" * 60)
    
    try:
        from cve_factchecker.firebase_loader import FirebaseNewsLoader
        
        loader = FirebaseNewsLoader()
        
        print(f"πŸ“‘ Project ID: {loader.project_id}")
        print(f"πŸ“š English Collection: {loader.config.ENGLISH_ARTICLES_COLLECTION}")
        
        # Test fetching English articles
        print(f"\nπŸ“Š Fetching English articles...")
        english_articles = loader.fetch_english_articles(limit=10)
        
        print(f"βœ… Successfully fetched {len(english_articles)} English articles")
        
        if english_articles:
            print(f"\nπŸ“– Sample articles:")
            for i, article in enumerate(english_articles[:3], 1):
                print(f"   {i}. {article.title[:80]}...")
                print(f"      πŸ“ Content: {len(article.content)} chars")
                if hasattr(article, 'language') and article.language:
                    print(f"      🌐 Language: {article.language}")
                if hasattr(article, 'url') and article.url:
                    print(f"      πŸ”— URL: {article.url}")
                print()
            
            # Test larger fetch
            print(f"πŸ“Š Testing larger fetch (100 articles)...")
            large_batch = loader.fetch_english_articles(limit=100)
            print(f"βœ… Large batch: {len(large_batch)} articles")
            
            # Test unlimited fetch
            print(f"πŸ“Š Testing unlimited fetch...")
            all_articles = loader.fetch_english_articles(limit=None)
            print(f"βœ… All articles: {len(all_articles)} articles")
            
            return len(all_articles)
        else:
            print("❌ No English articles found")
            return 0
            
    except Exception as e:
        print(f"❌ Test failed: {e}")
        import traceback
        traceback.print_exc()
        return 0

def test_integration_with_system():
    """Test integration with the rest of the system."""
    print(f"\nπŸ”§ Testing System Integration")
    print("=" * 60)
    
    try:
        from cve_factchecker.orchestrator import FactCheckSystem
        
        system = FactCheckSystem()
        
        print(f"πŸ“Š Testing English articles ingestion...")
        result = system.ingest_firebase(
            collection="english_articles",  # Use English collection
            limit=20,  # Small test batch
            language="English"
        )
        
        print(f"βœ… Ingestion result: {result}")
        
        if result.get("success"):
            synced_count = result.get("synced", 0)
            
            print(f"πŸ“š Articles synced: {synced_count}")
            
            # Check if vector database has chunks now
            try:
                # Try a test search to see if chunks were stored
                test_results = system.retriever.semantic_search("test", k=1)
                chunks_count = len(test_results)
                print(f"🧩 Searchable chunks: {chunks_count}")
                
                if synced_count > 0:
                    print(f"βœ… System integration successful!")
                    return True
                else:
                    print(f"⚠️ No articles were synced")
                    return False
            except Exception as e:
                print(f"⚠️ Could not test vector search: {e}")
                # Still consider success if articles were synced
                return synced_count > 0
        else:
            error = result.get("error", "Unknown error")
            print(f"❌ Ingestion failed: {error}")
            return False
            
    except Exception as e:
        print(f"❌ Integration test failed: {e}")
        import traceback
        traceback.print_exc()
        return False

def main():
    """Main test function."""
    print("πŸ§ͺ CVE Fact Checker - English Articles Collection Test")
    print("=" * 80)
    
    # Test English articles collection
    article_count = test_english_articles()
    
    # Test system integration if we have articles
    if article_count > 0:
        integration_success = test_integration_with_system()
    else:
        integration_success = False
    
    print(f"\nπŸ“‹ Test Summary:")
    print(f"   English articles found: {article_count}")
    print(f"   System integration: {'βœ… OK' if integration_success else '❌ Failed'}")
    
    if article_count > 0 and integration_success:
        print(f"\nπŸŽ‰ Success! English articles collection is working correctly.")
        print(f"πŸ’‘ You can now use the system to fetch English articles from Firebase.")
    elif article_count > 0:
        print(f"\n⚠️ Articles found but integration needs work.")
    else:
        print(f"\n❌ No English articles found. Check Firebase collection.")
    
    return article_count > 0

if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)