elly99 commited on
Commit
6ab6730
·
verified ·
1 Parent(s): 742cc20

Create Scientific_Embedding.py

Browse files
Files changed (1) hide show
  1. src/models/Scientific_Embedding.py +22 -0
src/models/Scientific_Embedding.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # © 2025 Elena Marziali — Code released under Apache 2.0 license.
2
+ # See LICENSE in the repository for details.
3
+ # Removal of this copyright is prohibited.
4
+
5
+ # This section manages the system's memory, allowing efficient storage and
6
+ # retrieval of scientific content. Embeddings are generated using models
7
+ # specialized for academic texts.
8
+
9
+ def safe_encode(text):
10
+ if not isinstance(text, str) or not text.strip():
11
+ raise ValueError("Il testo da codificare è vuoto o non valido.")
12
+ try:
13
+ return embedding_model.encode([text])
14
+ except Exception as e:
15
+ print(f"Errore durante l'embedding: {e}")
16
+ return np.zeros((1, 768), dtype=np.float32) # fallback neutro
17
+
18
+
19
+ # === Load Specter model ===
20
+ word_embedding_model = models.Transformer("allenai/specter")
21
+ pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension())
22
+ embedding_model = SentenceTransformer(modules=[word_embedding_model, pooling_model])