Spaces:
Running
Running
Stability: remove gunicorn --preload, increase timeout, disable tokenizers parallelism, cap BLAS threads, lighten /health, configurable embeddings
Browse files- Dockerfile +7 -1
- cve_factchecker/app.py +10 -7
- cve_factchecker/embeddings.py +3 -2
- cve_factchecker/retriever.py +2 -0
- run_production.py +1 -2
- start_production.py +1 -2
- startup.py +8 -0
Dockerfile
CHANGED
|
@@ -12,7 +12,13 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
|
|
| 12 |
AUTO_INGEST=true \
|
| 13 |
LANGUAGE_FILTER=English \
|
| 14 |
HF_HOME=/tmp/huggingface \
|
| 15 |
-
TRANSFORMERS_CACHE=/tmp/transformers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# System deps for chromadb and sentence-transformers
|
| 18 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
|
|
| 12 |
AUTO_INGEST=true \
|
| 13 |
LANGUAGE_FILTER=English \
|
| 14 |
HF_HOME=/tmp/huggingface \
|
| 15 |
+
TRANSFORMERS_CACHE=/tmp/transformers \
|
| 16 |
+
TOKENIZERS_PARALLELISM=false \
|
| 17 |
+
OMP_NUM_THREADS=1 \
|
| 18 |
+
OPENBLAS_NUM_THREADS=1 \
|
| 19 |
+
MKL_NUM_THREADS=1 \
|
| 20 |
+
NUMEXPR_NUM_THREADS=1 \
|
| 21 |
+
HF_HUB_DISABLE_TELEMETRY=1
|
| 22 |
|
| 23 |
# System deps for chromadb and sentence-transformers
|
| 24 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
cve_factchecker/app.py
CHANGED
|
@@ -283,22 +283,25 @@ def health() -> Any:
|
|
| 283 |
"ingestion_status": INGEST_STATUS.copy()
|
| 284 |
}
|
| 285 |
|
| 286 |
-
# Check if we have data in the vector store
|
| 287 |
try:
|
| 288 |
if system is None:
|
| 289 |
_safe_initialize_system()
|
| 290 |
|
| 291 |
if system:
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
|
|
|
|
|
|
|
|
|
| 296 |
|
| 297 |
# If no data and ingestion hasn't finished, provide more info
|
| 298 |
-
if
|
| 299 |
health_data["status"] = "initializing"
|
| 300 |
health_data["message"] = "Vector store empty, ingestion in progress"
|
| 301 |
-
elif
|
| 302 |
health_data["status"] = "warning"
|
| 303 |
health_data["message"] = "Vector store empty after ingestion completion"
|
| 304 |
|
|
|
|
| 283 |
"ingestion_status": INGEST_STATUS.copy()
|
| 284 |
}
|
| 285 |
|
| 286 |
+
# Check if we have data in the vector store (lightweight)
|
| 287 |
try:
|
| 288 |
if system is None:
|
| 289 |
_safe_initialize_system()
|
| 290 |
|
| 291 |
if system:
|
| 292 |
+
vector_count = 0
|
| 293 |
+
try:
|
| 294 |
+
vector_count = system.retriever.get_vector_count()
|
| 295 |
+
except Exception as _:
|
| 296 |
+
vector_count = 0
|
| 297 |
+
health_data["vector_store_populated"] = vector_count > 0
|
| 298 |
+
health_data["vector_count"] = vector_count
|
| 299 |
|
| 300 |
# If no data and ingestion hasn't finished, provide more info
|
| 301 |
+
if vector_count == 0 and not INGEST_STATUS.get("finished"):
|
| 302 |
health_data["status"] = "initializing"
|
| 303 |
health_data["message"] = "Vector store empty, ingestion in progress"
|
| 304 |
+
elif vector_count == 0 and INGEST_STATUS.get("finished"):
|
| 305 |
health_data["status"] = "warning"
|
| 306 |
health_data["message"] = "Vector store empty after ingestion completion"
|
| 307 |
|
cve_factchecker/embeddings.py
CHANGED
|
@@ -38,9 +38,10 @@ def build_embeddings() -> Any:
|
|
| 38 |
|
| 39 |
# Set environment variable for sentence-transformers cache
|
| 40 |
os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir
|
| 41 |
-
|
|
|
|
| 42 |
return HuggingFaceEmbeddings(
|
| 43 |
-
model_name=
|
| 44 |
model_kwargs={"device": "cpu"},
|
| 45 |
encode_kwargs={"normalize_embeddings": True},
|
| 46 |
cache_folder=cache_dir,
|
|
|
|
| 38 |
|
| 39 |
# Set environment variable for sentence-transformers cache
|
| 40 |
os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir
|
| 41 |
+
# Allow model to be overridden (and a smaller model to be used) via env
|
| 42 |
+
model_name = os.environ.get("EMBEDDINGS_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
| 43 |
return HuggingFaceEmbeddings(
|
| 44 |
+
model_name=model_name,
|
| 45 |
model_kwargs={"device": "cpu"},
|
| 46 |
encode_kwargs={"normalize_embeddings": True},
|
| 47 |
cache_folder=cache_dir,
|
cve_factchecker/retriever.py
CHANGED
|
@@ -206,6 +206,8 @@ class VectorNewsRetriever:
|
|
| 206 |
print(f"β
Stored {len(docs)} chunks from {len(articles)} articles")
|
| 207 |
def semantic_search(self, query: str, k: int = 5) -> List[Dict[str, Any]]:
|
| 208 |
try:
|
|
|
|
|
|
|
| 209 |
docs = self.vector_store.similarity_search(query, k=k)
|
| 210 |
except Exception as e:
|
| 211 |
print(f"β Vector search failed: {e}")
|
|
|
|
| 206 |
print(f"β
Stored {len(docs)} chunks from {len(articles)} articles")
|
| 207 |
def semantic_search(self, query: str, k: int = 5) -> List[Dict[str, Any]]:
|
| 208 |
try:
|
| 209 |
+
# Guardrails on k to avoid heavy loads
|
| 210 |
+
k = max(1, min(int(k or 5), 10))
|
| 211 |
docs = self.vector_store.similarity_search(query, k=k)
|
| 212 |
except Exception as e:
|
| 213 |
print(f"β Vector search failed: {e}")
|
run_production.py
CHANGED
|
@@ -63,8 +63,7 @@ def start_application():
|
|
| 63 |
"-k", "gthread",
|
| 64 |
"--threads", "4",
|
| 65 |
"-b", f"0.0.0.0:{env.get('PORT', '7860')}",
|
| 66 |
-
"--timeout", "
|
| 67 |
-
"--preload", # Preload app for better memory usage
|
| 68 |
"--access-logfile", "-", # Log to stdout
|
| 69 |
"--error-logfile", "-", # Log to stderr
|
| 70 |
"cve_factchecker.wsgi:application"
|
|
|
|
| 63 |
"-k", "gthread",
|
| 64 |
"--threads", "4",
|
| 65 |
"-b", f"0.0.0.0:{env.get('PORT', '7860')}",
|
| 66 |
+
"--timeout", "180",
|
|
|
|
| 67 |
"--access-logfile", "-", # Log to stdout
|
| 68 |
"--error-logfile", "-", # Log to stderr
|
| 69 |
"cve_factchecker.wsgi:application"
|
start_production.py
CHANGED
|
@@ -41,8 +41,7 @@ def start_production_server():
|
|
| 41 |
"-k", "gthread",
|
| 42 |
"--threads", "4",
|
| 43 |
"-b", f"0.0.0.0:{os.environ.get('PORT', '7860')}",
|
| 44 |
-
"--timeout", "
|
| 45 |
-
"--preload",
|
| 46 |
"--access-logfile", "-",
|
| 47 |
"--error-logfile", "-",
|
| 48 |
"cve_factchecker.wsgi:application"
|
|
|
|
| 41 |
"-k", "gthread",
|
| 42 |
"--threads", "4",
|
| 43 |
"-b", f"0.0.0.0:{os.environ.get('PORT', '7860')}",
|
| 44 |
+
"--timeout", "180",
|
|
|
|
| 45 |
"--access-logfile", "-",
|
| 46 |
"--error-logfile", "-",
|
| 47 |
"cve_factchecker.wsgi:application"
|
startup.py
CHANGED
|
@@ -45,6 +45,14 @@ def setup_environment():
|
|
| 45 |
except Exception as e:
|
| 46 |
print(f"β Could not set {env_var}: {e}")
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
def check_permissions():
|
| 50 |
"""Check and report on directory permissions."""
|
|
|
|
| 45 |
except Exception as e:
|
| 46 |
print(f"β Could not set {env_var}: {e}")
|
| 47 |
|
| 48 |
+
# Ensure tokenizer libs don't attempt parallelism post-fork (prevents hangs)
|
| 49 |
+
os.environ.setdefault('TOKENIZERS_PARALLELISM', 'false')
|
| 50 |
+
# Cap thread usage for BLAS backends to avoid CPU thrashing on shared runners
|
| 51 |
+
os.environ.setdefault('OMP_NUM_THREADS', '1')
|
| 52 |
+
os.environ.setdefault('OPENBLAS_NUM_THREADS', '1')
|
| 53 |
+
os.environ.setdefault('MKL_NUM_THREADS', '1')
|
| 54 |
+
os.environ.setdefault('NUMEXPR_NUM_THREADS', '1')
|
| 55 |
+
|
| 56 |
|
| 57 |
def check_permissions():
|
| 58 |
"""Check and report on directory permissions."""
|