Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| from typing import List, Any | |
| import os | |
| class SimpleDummyEmbeddings: | |
| def __init__(self, dim: int = 384): | |
| self.dimension = dim | |
| def embed_documents(self, texts: List[str]) -> List[List[float]]: | |
| vecs: List[List[float]] = [] | |
| for t in texts: | |
| h = abs(hash(t.lower())) | |
| v = [(float((h >> i) & 1)) for i in range(self.dimension)] | |
| norm = sum(x * x for x in v) ** 0.5 or 1.0 | |
| vecs.append([x / norm for x in v]) | |
| return vecs | |
| def embed_query(self, text: str) -> List[float]: | |
| return self.embed_documents([text])[0] | |
| def build_embeddings() -> Any: | |
| # Allow forcing lightweight embeddings to speed up cold starts (e.g., on Spaces) | |
| if os.environ.get("USE_DUMMY_EMBEDDINGS", "").lower() in ("1", "true", "yes"): # pragma: no cover | |
| return SimpleDummyEmbeddings() | |
| try: | |
| from langchain_huggingface import HuggingFaceEmbeddings # type: ignore | |
| except Exception: | |
| try: | |
| from langchain_community.embeddings import HuggingFaceEmbeddings # type: ignore | |
| except Exception: | |
| HuggingFaceEmbeddings = None # type: ignore | |
| if "HuggingFaceEmbeddings" in locals() and HuggingFaceEmbeddings is not None: # type: ignore | |
| try: | |
| # Set cache directory to a writable location | |
| cache_dir = "/tmp/sentence_transformers" | |
| if os.path.exists("/data"): | |
| cache_dir = "/data/sentence_transformers" | |
| # Set environment variable for sentence-transformers cache | |
| os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir | |
| return HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2", | |
| model_kwargs={"device": "cpu"}, | |
| encode_kwargs={"normalize_embeddings": True}, | |
| cache_folder=cache_dir, | |
| ) | |
| except Exception as e: | |
| print(f"β οΈ Could not load HuggingFace embeddings: {e}") | |
| print("π Using dummy embeddings fallback") | |
| return SimpleDummyEmbeddings() | |