Spaces:

NLPGenius
/

CVE-FactChecker

Sleeping

File size: 2,160 Bytes

from __future__ import annotations
from typing import List, Any
import os

class SimpleDummyEmbeddings:
    def __init__(self, dim: int = 384):
        self.dimension = dim
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        vecs: List[List[float]] = []
        for t in texts:
            h = abs(hash(t.lower()))
            v = [(float((h >> i) & 1)) for i in range(self.dimension)]
            norm = sum(x * x for x in v) ** 0.5 or 1.0
            vecs.append([x / norm for x in v])
        return vecs
    def embed_query(self, text: str) -> List[float]:
        return self.embed_documents([text])[0]

def build_embeddings() -> Any:
    # Allow forcing lightweight embeddings to speed up cold starts (e.g., on Spaces)
    if os.environ.get("USE_DUMMY_EMBEDDINGS", "").lower() in ("1", "true", "yes"):  # pragma: no cover
        return SimpleDummyEmbeddings()
    
    try:
        from langchain_huggingface import HuggingFaceEmbeddings  # type: ignore
    except Exception:
        try:
            from langchain_community.embeddings import HuggingFaceEmbeddings  # type: ignore
        except Exception:
            HuggingFaceEmbeddings = None  # type: ignore
    
    if "HuggingFaceEmbeddings" in locals() and HuggingFaceEmbeddings is not None:  # type: ignore
        try:
            # Set cache directory to a writable location
            cache_dir = "/tmp/sentence_transformers"
            if os.path.exists("/data"):
                cache_dir = "/data/sentence_transformers"
            
            # Set environment variable for sentence-transformers cache
            os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir
            
            return HuggingFaceEmbeddings(
                model_name="sentence-transformers/all-MiniLM-L6-v2",
                model_kwargs={"device": "cpu"},
                encode_kwargs={"normalize_embeddings": True},
                cache_folder=cache_dir,
            )
        except Exception as e:
            print(f"⚠️ Could not load HuggingFace embeddings: {e}")
            print("🔄 Using dummy embeddings fallback")
    
    return SimpleDummyEmbeddings()