Spaces:
Running
Running
Fix Firebase rate limiting infinite loop - progressive batch reduction and max retries
Browse files- cve_factchecker/__pycache__/analyzer.cpython-311.pyc +0 -0
- cve_factchecker/__pycache__/embeddings.cpython-311.pyc +0 -0
- cve_factchecker/__pycache__/firebase_loader.cpython-311.pyc +0 -0
- cve_factchecker/__pycache__/llm.cpython-311.pyc +0 -0
- cve_factchecker/__pycache__/models.cpython-311.pyc +0 -0
- cve_factchecker/__pycache__/orchestrator.cpython-311.pyc +0 -0
- cve_factchecker/__pycache__/retriever.cpython-311.pyc +0 -0
- cve_factchecker/app.py +2 -2
- cve_factchecker/firebase_loader.py +19 -5
cve_factchecker/__pycache__/analyzer.cpython-311.pyc
CHANGED
|
Binary files a/cve_factchecker/__pycache__/analyzer.cpython-311.pyc and b/cve_factchecker/__pycache__/analyzer.cpython-311.pyc differ
|
|
|
cve_factchecker/__pycache__/embeddings.cpython-311.pyc
CHANGED
|
Binary files a/cve_factchecker/__pycache__/embeddings.cpython-311.pyc and b/cve_factchecker/__pycache__/embeddings.cpython-311.pyc differ
|
|
|
cve_factchecker/__pycache__/firebase_loader.cpython-311.pyc
CHANGED
|
Binary files a/cve_factchecker/__pycache__/firebase_loader.cpython-311.pyc and b/cve_factchecker/__pycache__/firebase_loader.cpython-311.pyc differ
|
|
|
cve_factchecker/__pycache__/llm.cpython-311.pyc
CHANGED
|
Binary files a/cve_factchecker/__pycache__/llm.cpython-311.pyc and b/cve_factchecker/__pycache__/llm.cpython-311.pyc differ
|
|
|
cve_factchecker/__pycache__/models.cpython-311.pyc
CHANGED
|
Binary files a/cve_factchecker/__pycache__/models.cpython-311.pyc and b/cve_factchecker/__pycache__/models.cpython-311.pyc differ
|
|
|
cve_factchecker/__pycache__/orchestrator.cpython-311.pyc
CHANGED
|
Binary files a/cve_factchecker/__pycache__/orchestrator.cpython-311.pyc and b/cve_factchecker/__pycache__/orchestrator.cpython-311.pyc differ
|
|
|
cve_factchecker/__pycache__/retriever.cpython-311.pyc
CHANGED
|
Binary files a/cve_factchecker/__pycache__/retriever.cpython-311.pyc and b/cve_factchecker/__pycache__/retriever.cpython-311.pyc differ
|
|
|
cve_factchecker/app.py
CHANGED
|
@@ -159,8 +159,8 @@ def _background_ingest() -> None:
|
|
| 159 |
|
| 160 |
for attempt in range(max_retries):
|
| 161 |
try:
|
| 162 |
-
# Use
|
| 163 |
-
limit = min(
|
| 164 |
|
| 165 |
# Use dedicated English articles collection for better results
|
| 166 |
if LANGUAGE_FILTER.lower() in ["english", "en"]:
|
|
|
|
| 159 |
|
| 160 |
for attempt in range(max_retries):
|
| 161 |
try:
|
| 162 |
+
# Use conservative limit to avoid rate limiting
|
| 163 |
+
limit = min(500, 5000) # Further reduced initial batch
|
| 164 |
|
| 165 |
# Use dedicated English articles collection for better results
|
| 166 |
if LANGUAGE_FILTER.lower() in ["english", "en"]:
|
cve_factchecker/firebase_loader.py
CHANGED
|
@@ -67,13 +67,20 @@ class FirebaseNewsLoader:
|
|
| 67 |
|
| 68 |
articles: List[NewsArticle] = []
|
| 69 |
page_token: Optional[str] = None
|
| 70 |
-
batch_size = min(
|
| 71 |
remaining = limit
|
|
|
|
|
|
|
| 72 |
|
| 73 |
while True:
|
| 74 |
if remaining is not None and remaining <= 0:
|
| 75 |
break
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
page_size = batch_size if remaining is None else min(batch_size, remaining)
|
| 78 |
params = {
|
| 79 |
"key": self.config.api_key,
|
|
@@ -87,11 +94,18 @@ class FirebaseNewsLoader:
|
|
| 87 |
resp = requests.get(base_url, params=params, timeout=30)
|
| 88 |
|
| 89 |
if resp.status_code == 429: # Rate limit
|
| 90 |
-
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
time.sleep(retry_after)
|
| 93 |
continue
|
| 94 |
-
|
|
|
|
|
|
|
| 95 |
print(f"β Failed to fetch English articles: {resp.status_code}")
|
| 96 |
if resp.status_code == 404:
|
| 97 |
print(f"π‘ Collection '{collection_name}' not found. Falling back to language filtering...")
|
|
@@ -541,7 +555,7 @@ class FirebaseNewsLoader:
|
|
| 541 |
print(f"β Firebase API rate limited: waiting {retry_after}s")
|
| 542 |
time.sleep(retry_after)
|
| 543 |
continue
|
| 544 |
-
|
| 545 |
print(f"β Firebase API failed: {resp.status_code}")
|
| 546 |
if resp.status_code >= 500: # Server error, might be temporary
|
| 547 |
time.sleep(5)
|
|
|
|
| 67 |
|
| 68 |
articles: List[NewsArticle] = []
|
| 69 |
page_token: Optional[str] = None
|
| 70 |
+
batch_size = min(100, limit or 100) # Start conservative
|
| 71 |
remaining = limit
|
| 72 |
+
consecutive_rate_limits = 0
|
| 73 |
+
max_rate_limit_retries = 5 # Prevent infinite loops
|
| 74 |
|
| 75 |
while True:
|
| 76 |
if remaining is not None and remaining <= 0:
|
| 77 |
break
|
| 78 |
|
| 79 |
+
# Progressive batch size reduction on rate limits
|
| 80 |
+
if consecutive_rate_limits > 2:
|
| 81 |
+
batch_size = max(10, batch_size // 2)
|
| 82 |
+
print(f"π½ Reducing batch size to {batch_size} due to rate limits")
|
| 83 |
+
|
| 84 |
page_size = batch_size if remaining is None else min(batch_size, remaining)
|
| 85 |
params = {
|
| 86 |
"key": self.config.api_key,
|
|
|
|
| 94 |
resp = requests.get(base_url, params=params, timeout=30)
|
| 95 |
|
| 96 |
if resp.status_code == 429: # Rate limit
|
| 97 |
+
consecutive_rate_limits += 1
|
| 98 |
+
if consecutive_rate_limits > max_rate_limit_retries:
|
| 99 |
+
print(f"β Too many consecutive rate limits ({consecutive_rate_limits}), stopping fetch")
|
| 100 |
+
break
|
| 101 |
+
|
| 102 |
+
retry_after = int(resp.headers.get('Retry-After', min(60, 10 * consecutive_rate_limits)))
|
| 103 |
+
print(f"β³ Rate limited #{consecutive_rate_limits}, waiting {retry_after}s...")
|
| 104 |
time.sleep(retry_after)
|
| 105 |
continue
|
| 106 |
+
else:
|
| 107 |
+
consecutive_rate_limits = 0 # Reset on success
|
| 108 |
+
if resp.status_code != 200:
|
| 109 |
print(f"β Failed to fetch English articles: {resp.status_code}")
|
| 110 |
if resp.status_code == 404:
|
| 111 |
print(f"π‘ Collection '{collection_name}' not found. Falling back to language filtering...")
|
|
|
|
| 555 |
print(f"β Firebase API rate limited: waiting {retry_after}s")
|
| 556 |
time.sleep(retry_after)
|
| 557 |
continue
|
| 558 |
+
if resp.status_code != 200:
|
| 559 |
print(f"β Firebase API failed: {resp.status_code}")
|
| 560 |
if resp.status_code >= 500: # Server error, might be temporary
|
| 561 |
time.sleep(5)
|