NLPGenius commited on
Commit
aa89b96
Β·
1 Parent(s): a7270f3

Fix Firebase rate limiting infinite loop - progressive batch reduction and max retries

Browse files
cve_factchecker/__pycache__/analyzer.cpython-311.pyc CHANGED
Binary files a/cve_factchecker/__pycache__/analyzer.cpython-311.pyc and b/cve_factchecker/__pycache__/analyzer.cpython-311.pyc differ
 
cve_factchecker/__pycache__/embeddings.cpython-311.pyc CHANGED
Binary files a/cve_factchecker/__pycache__/embeddings.cpython-311.pyc and b/cve_factchecker/__pycache__/embeddings.cpython-311.pyc differ
 
cve_factchecker/__pycache__/firebase_loader.cpython-311.pyc CHANGED
Binary files a/cve_factchecker/__pycache__/firebase_loader.cpython-311.pyc and b/cve_factchecker/__pycache__/firebase_loader.cpython-311.pyc differ
 
cve_factchecker/__pycache__/llm.cpython-311.pyc CHANGED
Binary files a/cve_factchecker/__pycache__/llm.cpython-311.pyc and b/cve_factchecker/__pycache__/llm.cpython-311.pyc differ
 
cve_factchecker/__pycache__/models.cpython-311.pyc CHANGED
Binary files a/cve_factchecker/__pycache__/models.cpython-311.pyc and b/cve_factchecker/__pycache__/models.cpython-311.pyc differ
 
cve_factchecker/__pycache__/orchestrator.cpython-311.pyc CHANGED
Binary files a/cve_factchecker/__pycache__/orchestrator.cpython-311.pyc and b/cve_factchecker/__pycache__/orchestrator.cpython-311.pyc differ
 
cve_factchecker/__pycache__/retriever.cpython-311.pyc CHANGED
Binary files a/cve_factchecker/__pycache__/retriever.cpython-311.pyc and b/cve_factchecker/__pycache__/retriever.cpython-311.pyc differ
 
cve_factchecker/app.py CHANGED
@@ -159,8 +159,8 @@ def _background_ingest() -> None:
159
 
160
  for attempt in range(max_retries):
161
  try:
162
- # Use smaller limit to avoid rate limiting, prioritize English articles collection
163
- limit = min(1000, 5000) # Start with smaller batch
164
 
165
  # Use dedicated English articles collection for better results
166
  if LANGUAGE_FILTER.lower() in ["english", "en"]:
 
159
 
160
  for attempt in range(max_retries):
161
  try:
162
+ # Use conservative limit to avoid rate limiting
163
+ limit = min(500, 5000) # Further reduced initial batch
164
 
165
  # Use dedicated English articles collection for better results
166
  if LANGUAGE_FILTER.lower() in ["english", "en"]:
cve_factchecker/firebase_loader.py CHANGED
@@ -67,13 +67,20 @@ class FirebaseNewsLoader:
67
 
68
  articles: List[NewsArticle] = []
69
  page_token: Optional[str] = None
70
- batch_size = min(300, limit or 300) # Firestore max pageSize
71
  remaining = limit
 
 
72
 
73
  while True:
74
  if remaining is not None and remaining <= 0:
75
  break
76
 
 
 
 
 
 
77
  page_size = batch_size if remaining is None else min(batch_size, remaining)
78
  params = {
79
  "key": self.config.api_key,
@@ -87,11 +94,18 @@ class FirebaseNewsLoader:
87
  resp = requests.get(base_url, params=params, timeout=30)
88
 
89
  if resp.status_code == 429: # Rate limit
90
- retry_after = int(resp.headers.get('Retry-After', 30))
91
- print(f"⏳ Rate limited, waiting {retry_after}s...")
 
 
 
 
 
92
  time.sleep(retry_after)
93
  continue
94
- elif resp.status_code != 200:
 
 
95
  print(f"❌ Failed to fetch English articles: {resp.status_code}")
96
  if resp.status_code == 404:
97
  print(f"πŸ’‘ Collection '{collection_name}' not found. Falling back to language filtering...")
@@ -541,7 +555,7 @@ class FirebaseNewsLoader:
541
  print(f"❌ Firebase API rate limited: waiting {retry_after}s")
542
  time.sleep(retry_after)
543
  continue
544
- elif resp.status_code != 200:
545
  print(f"❌ Firebase API failed: {resp.status_code}")
546
  if resp.status_code >= 500: # Server error, might be temporary
547
  time.sleep(5)
 
67
 
68
  articles: List[NewsArticle] = []
69
  page_token: Optional[str] = None
70
+ batch_size = min(100, limit or 100) # Start conservative
71
  remaining = limit
72
+ consecutive_rate_limits = 0
73
+ max_rate_limit_retries = 5 # Prevent infinite loops
74
 
75
  while True:
76
  if remaining is not None and remaining <= 0:
77
  break
78
 
79
+ # Progressive batch size reduction on rate limits
80
+ if consecutive_rate_limits > 2:
81
+ batch_size = max(10, batch_size // 2)
82
+ print(f"πŸ”½ Reducing batch size to {batch_size} due to rate limits")
83
+
84
  page_size = batch_size if remaining is None else min(batch_size, remaining)
85
  params = {
86
  "key": self.config.api_key,
 
94
  resp = requests.get(base_url, params=params, timeout=30)
95
 
96
  if resp.status_code == 429: # Rate limit
97
+ consecutive_rate_limits += 1
98
+ if consecutive_rate_limits > max_rate_limit_retries:
99
+ print(f"❌ Too many consecutive rate limits ({consecutive_rate_limits}), stopping fetch")
100
+ break
101
+
102
+ retry_after = int(resp.headers.get('Retry-After', min(60, 10 * consecutive_rate_limits)))
103
+ print(f"⏳ Rate limited #{consecutive_rate_limits}, waiting {retry_after}s...")
104
  time.sleep(retry_after)
105
  continue
106
+ else:
107
+ consecutive_rate_limits = 0 # Reset on success
108
+ if resp.status_code != 200:
109
  print(f"❌ Failed to fetch English articles: {resp.status_code}")
110
  if resp.status_code == 404:
111
  print(f"πŸ’‘ Collection '{collection_name}' not found. Falling back to language filtering...")
 
555
  print(f"❌ Firebase API rate limited: waiting {retry_after}s")
556
  time.sleep(retry_after)
557
  continue
558
+ if resp.status_code != 200:
559
  print(f"❌ Firebase API failed: {resp.status_code}")
560
  if resp.status_code >= 500: # Server error, might be temporary
561
  time.sleep(5)