Spaces:

NLPGenius
/

CVE-FactChecker

Running

NLPGenius commited on Sep 16

Commit

1dd0906

1 Parent(s): c9eed71

Stability: remove gunicorn --preload, increase timeout, disable tokenizers parallelism, cap BLAS threads, lighten /health, configurable embeddings

Files changed (7) hide show

Dockerfile CHANGED Viewed

@@ -12,7 +12,13 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
     AUTO_INGEST=true \
     LANGUAGE_FILTER=English \
     HF_HOME=/tmp/huggingface \
-    TRANSFORMERS_CACHE=/tmp/transformers
 # System deps for chromadb and sentence-transformers
 RUN apt-get update && apt-get install -y --no-install-recommends \

     AUTO_INGEST=true \
     LANGUAGE_FILTER=English \
     HF_HOME=/tmp/huggingface \
+    TRANSFORMERS_CACHE=/tmp/transformers \
+    TOKENIZERS_PARALLELISM=false \
+    OMP_NUM_THREADS=1 \
+    OPENBLAS_NUM_THREADS=1 \
+    MKL_NUM_THREADS=1 \
+    NUMEXPR_NUM_THREADS=1 \
+    HF_HUB_DISABLE_TELEMETRY=1
 # System deps for chromadb and sentence-transformers
 RUN apt-get update && apt-get install -y --no-install-recommends \

cve_factchecker/app.py CHANGED Viewed

@@ -283,22 +283,25 @@ def health() -> Any:
         "ingestion_status": INGEST_STATUS.copy()
     }
-    # Check if we have data in the vector store
     try:
         if system is None:
             _safe_initialize_system()
         if system:
-            # Try a quick search to see if we have data
-            test_results = system.retriever.semantic_search("test", k=1)
-            health_data["vector_store_populated"] = len(test_results) > 0
-            health_data["sample_documents"] = len(test_results)
             # If no data and ingestion hasn't finished, provide more info
-            if len(test_results) == 0 and not INGEST_STATUS.get("finished"):
                 health_data["status"] = "initializing"
                 health_data["message"] = "Vector store empty, ingestion in progress"
-            elif len(test_results) == 0 and INGEST_STATUS.get("finished"):
                 health_data["status"] = "warning"
                 health_data["message"] = "Vector store empty after ingestion completion"

         "ingestion_status": INGEST_STATUS.copy()
     }
+    # Check if we have data in the vector store (lightweight)
     try:
         if system is None:
             _safe_initialize_system()
         if system:
+            vector_count = 0
+            try:
+                vector_count = system.retriever.get_vector_count()
+            except Exception as _:
+                vector_count = 0
+            health_data["vector_store_populated"] = vector_count > 0
+            health_data["vector_count"] = vector_count
             # If no data and ingestion hasn't finished, provide more info
+            if vector_count == 0 and not INGEST_STATUS.get("finished"):
                 health_data["status"] = "initializing"
                 health_data["message"] = "Vector store empty, ingestion in progress"
+            elif vector_count == 0 and INGEST_STATUS.get("finished"):
                 health_data["status"] = "warning"
                 health_data["message"] = "Vector store empty after ingestion completion"

cve_factchecker/embeddings.py CHANGED Viewed

@@ -38,9 +38,10 @@ def build_embeddings() -> Any:
             # Set environment variable for sentence-transformers cache
             os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir
             return HuggingFaceEmbeddings(
-                model_name="sentence-transformers/all-MiniLM-L6-v2",
                 model_kwargs={"device": "cpu"},
                 encode_kwargs={"normalize_embeddings": True},
                 cache_folder=cache_dir,

             # Set environment variable for sentence-transformers cache
             os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir
+            # Allow model to be overridden (and a smaller model to be used) via env
+            model_name = os.environ.get("EMBEDDINGS_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
             return HuggingFaceEmbeddings(
+                model_name=model_name,
                 model_kwargs={"device": "cpu"},
                 encode_kwargs={"normalize_embeddings": True},
                 cache_folder=cache_dir,

cve_factchecker/retriever.py CHANGED Viewed

@@ -206,6 +206,8 @@ class VectorNewsRetriever:
         print(f"✅ Stored {len(docs)} chunks from {len(articles)} articles")
     def semantic_search(self, query: str, k: int = 5) -> List[Dict[str, Any]]:
         try:
             docs = self.vector_store.similarity_search(query, k=k)
         except Exception as e:
             print(f"❌ Vector search failed: {e}")

         print(f"✅ Stored {len(docs)} chunks from {len(articles)} articles")
     def semantic_search(self, query: str, k: int = 5) -> List[Dict[str, Any]]:
         try:
+            # Guardrails on k to avoid heavy loads
+            k = max(1, min(int(k or 5), 10))
             docs = self.vector_store.similarity_search(query, k=k)
         except Exception as e:
             print(f"❌ Vector search failed: {e}")

run_production.py CHANGED Viewed

@@ -63,8 +63,7 @@ def start_application():
         "-k", "gthread",
         "--threads", "4",
         "-b", f"0.0.0.0:{env.get('PORT', '7860')}",
-        "--timeout", "120",
-        "--preload",  # Preload app for better memory usage
         "--access-logfile", "-",  # Log to stdout
         "--error-logfile", "-",   # Log to stderr
         "cve_factchecker.wsgi:application"

         "-k", "gthread",
         "--threads", "4",
         "-b", f"0.0.0.0:{env.get('PORT', '7860')}",
+        "--timeout", "180",
         "--access-logfile", "-",  # Log to stdout
         "--error-logfile", "-",   # Log to stderr
         "cve_factchecker.wsgi:application"

start_production.py CHANGED Viewed

@@ -41,8 +41,7 @@ def start_production_server():
             "-k", "gthread",
             "--threads", "4",
             "-b", f"0.0.0.0:{os.environ.get('PORT', '7860')}",
-            "--timeout", "120",
-            "--preload",
             "--access-logfile", "-",
             "--error-logfile", "-",
             "cve_factchecker.wsgi:application"

             "-k", "gthread",
             "--threads", "4",
             "-b", f"0.0.0.0:{os.environ.get('PORT', '7860')}",
+            "--timeout", "180",
             "--access-logfile", "-",
             "--error-logfile", "-",
             "cve_factchecker.wsgi:application"

startup.py CHANGED Viewed

@@ -45,6 +45,14 @@ def setup_environment():
                 except Exception as e:
                     print(f"❌ Could not set {env_var}: {e}")
 def check_permissions():
     """Check and report on directory permissions."""

                 except Exception as e:
                     print(f"❌ Could not set {env_var}: {e}")
+    # Ensure tokenizer libs don't attempt parallelism post-fork (prevents hangs)
+    os.environ.setdefault('TOKENIZERS_PARALLELISM', 'false')
+    # Cap thread usage for BLAS backends to avoid CPU thrashing on shared runners
+    os.environ.setdefault('OMP_NUM_THREADS', '1')
+    os.environ.setdefault('OPENBLAS_NUM_THREADS', '1')
+    os.environ.setdefault('MKL_NUM_THREADS', '1')
+    os.environ.setdefault('NUMEXPR_NUM_THREADS', '1')
 def check_permissions():
     """Check and report on directory permissions."""