NLPGenius commited on
Commit
1dd0906
Β·
1 Parent(s): c9eed71

Stability: remove gunicorn --preload, increase timeout, disable tokenizers parallelism, cap BLAS threads, lighten /health, configurable embeddings

Browse files
Dockerfile CHANGED
@@ -12,7 +12,13 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
12
  AUTO_INGEST=true \
13
  LANGUAGE_FILTER=English \
14
  HF_HOME=/tmp/huggingface \
15
- TRANSFORMERS_CACHE=/tmp/transformers
 
 
 
 
 
 
16
 
17
  # System deps for chromadb and sentence-transformers
18
  RUN apt-get update && apt-get install -y --no-install-recommends \
 
12
  AUTO_INGEST=true \
13
  LANGUAGE_FILTER=English \
14
  HF_HOME=/tmp/huggingface \
15
+ TRANSFORMERS_CACHE=/tmp/transformers \
16
+ TOKENIZERS_PARALLELISM=false \
17
+ OMP_NUM_THREADS=1 \
18
+ OPENBLAS_NUM_THREADS=1 \
19
+ MKL_NUM_THREADS=1 \
20
+ NUMEXPR_NUM_THREADS=1 \
21
+ HF_HUB_DISABLE_TELEMETRY=1
22
 
23
  # System deps for chromadb and sentence-transformers
24
  RUN apt-get update && apt-get install -y --no-install-recommends \
cve_factchecker/app.py CHANGED
@@ -283,22 +283,25 @@ def health() -> Any:
283
  "ingestion_status": INGEST_STATUS.copy()
284
  }
285
 
286
- # Check if we have data in the vector store
287
  try:
288
  if system is None:
289
  _safe_initialize_system()
290
 
291
  if system:
292
- # Try a quick search to see if we have data
293
- test_results = system.retriever.semantic_search("test", k=1)
294
- health_data["vector_store_populated"] = len(test_results) > 0
295
- health_data["sample_documents"] = len(test_results)
 
 
 
296
 
297
  # If no data and ingestion hasn't finished, provide more info
298
- if len(test_results) == 0 and not INGEST_STATUS.get("finished"):
299
  health_data["status"] = "initializing"
300
  health_data["message"] = "Vector store empty, ingestion in progress"
301
- elif len(test_results) == 0 and INGEST_STATUS.get("finished"):
302
  health_data["status"] = "warning"
303
  health_data["message"] = "Vector store empty after ingestion completion"
304
 
 
283
  "ingestion_status": INGEST_STATUS.copy()
284
  }
285
 
286
+ # Check if we have data in the vector store (lightweight)
287
  try:
288
  if system is None:
289
  _safe_initialize_system()
290
 
291
  if system:
292
+ vector_count = 0
293
+ try:
294
+ vector_count = system.retriever.get_vector_count()
295
+ except Exception as _:
296
+ vector_count = 0
297
+ health_data["vector_store_populated"] = vector_count > 0
298
+ health_data["vector_count"] = vector_count
299
 
300
  # If no data and ingestion hasn't finished, provide more info
301
+ if vector_count == 0 and not INGEST_STATUS.get("finished"):
302
  health_data["status"] = "initializing"
303
  health_data["message"] = "Vector store empty, ingestion in progress"
304
+ elif vector_count == 0 and INGEST_STATUS.get("finished"):
305
  health_data["status"] = "warning"
306
  health_data["message"] = "Vector store empty after ingestion completion"
307
 
cve_factchecker/embeddings.py CHANGED
@@ -38,9 +38,10 @@ def build_embeddings() -> Any:
38
 
39
  # Set environment variable for sentence-transformers cache
40
  os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir
41
-
 
42
  return HuggingFaceEmbeddings(
43
- model_name="sentence-transformers/all-MiniLM-L6-v2",
44
  model_kwargs={"device": "cpu"},
45
  encode_kwargs={"normalize_embeddings": True},
46
  cache_folder=cache_dir,
 
38
 
39
  # Set environment variable for sentence-transformers cache
40
  os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir
41
+ # Allow model to be overridden (and a smaller model to be used) via env
42
+ model_name = os.environ.get("EMBEDDINGS_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
43
  return HuggingFaceEmbeddings(
44
+ model_name=model_name,
45
  model_kwargs={"device": "cpu"},
46
  encode_kwargs={"normalize_embeddings": True},
47
  cache_folder=cache_dir,
cve_factchecker/retriever.py CHANGED
@@ -206,6 +206,8 @@ class VectorNewsRetriever:
206
  print(f"βœ… Stored {len(docs)} chunks from {len(articles)} articles")
207
  def semantic_search(self, query: str, k: int = 5) -> List[Dict[str, Any]]:
208
  try:
 
 
209
  docs = self.vector_store.similarity_search(query, k=k)
210
  except Exception as e:
211
  print(f"❌ Vector search failed: {e}")
 
206
  print(f"βœ… Stored {len(docs)} chunks from {len(articles)} articles")
207
  def semantic_search(self, query: str, k: int = 5) -> List[Dict[str, Any]]:
208
  try:
209
+ # Guardrails on k to avoid heavy loads
210
+ k = max(1, min(int(k or 5), 10))
211
  docs = self.vector_store.similarity_search(query, k=k)
212
  except Exception as e:
213
  print(f"❌ Vector search failed: {e}")
run_production.py CHANGED
@@ -63,8 +63,7 @@ def start_application():
63
  "-k", "gthread",
64
  "--threads", "4",
65
  "-b", f"0.0.0.0:{env.get('PORT', '7860')}",
66
- "--timeout", "120",
67
- "--preload", # Preload app for better memory usage
68
  "--access-logfile", "-", # Log to stdout
69
  "--error-logfile", "-", # Log to stderr
70
  "cve_factchecker.wsgi:application"
 
63
  "-k", "gthread",
64
  "--threads", "4",
65
  "-b", f"0.0.0.0:{env.get('PORT', '7860')}",
66
+ "--timeout", "180",
 
67
  "--access-logfile", "-", # Log to stdout
68
  "--error-logfile", "-", # Log to stderr
69
  "cve_factchecker.wsgi:application"
start_production.py CHANGED
@@ -41,8 +41,7 @@ def start_production_server():
41
  "-k", "gthread",
42
  "--threads", "4",
43
  "-b", f"0.0.0.0:{os.environ.get('PORT', '7860')}",
44
- "--timeout", "120",
45
- "--preload",
46
  "--access-logfile", "-",
47
  "--error-logfile", "-",
48
  "cve_factchecker.wsgi:application"
 
41
  "-k", "gthread",
42
  "--threads", "4",
43
  "-b", f"0.0.0.0:{os.environ.get('PORT', '7860')}",
44
+ "--timeout", "180",
 
45
  "--access-logfile", "-",
46
  "--error-logfile", "-",
47
  "cve_factchecker.wsgi:application"
startup.py CHANGED
@@ -45,6 +45,14 @@ def setup_environment():
45
  except Exception as e:
46
  print(f"❌ Could not set {env_var}: {e}")
47
 
 
 
 
 
 
 
 
 
48
 
49
  def check_permissions():
50
  """Check and report on directory permissions."""
 
45
  except Exception as e:
46
  print(f"❌ Could not set {env_var}: {e}")
47
 
48
+ # Ensure tokenizer libs don't attempt parallelism post-fork (prevents hangs)
49
+ os.environ.setdefault('TOKENIZERS_PARALLELISM', 'false')
50
+ # Cap thread usage for BLAS backends to avoid CPU thrashing on shared runners
51
+ os.environ.setdefault('OMP_NUM_THREADS', '1')
52
+ os.environ.setdefault('OPENBLAS_NUM_THREADS', '1')
53
+ os.environ.setdefault('MKL_NUM_THREADS', '1')
54
+ os.environ.setdefault('NUMEXPR_NUM_THREADS', '1')
55
+
56
 
57
  def check_permissions():
58
  """Check and report on directory permissions."""