metadata
base_model: intfloat/multilingual-e5-small
datasets: []
language: []
library_name: sentence-transformers
metrics:
- cosine_accuracy
- cosine_accuracy_threshold
- cosine_f1
- cosine_f1_threshold
- cosine_precision
- cosine_recall
- cosine_ap
- dot_accuracy
- dot_accuracy_threshold
- dot_f1
- dot_f1_threshold
- dot_precision
- dot_recall
- dot_ap
- manhattan_accuracy
- manhattan_accuracy_threshold
- manhattan_f1
- manhattan_f1_threshold
- manhattan_precision
- manhattan_recall
- manhattan_ap
- euclidean_accuracy
- euclidean_accuracy_threshold
- euclidean_f1
- euclidean_f1_threshold
- euclidean_precision
- euclidean_recall
- euclidean_ap
- max_accuracy
- max_accuracy_threshold
- max_f1
- max_f1_threshold
- max_precision
- max_recall
- max_ap
pipeline_tag: sentence-similarity
tags:
- sentence-transformers
- sentence-similarity
- feature-extraction
- generated_from_trainer
- dataset_size:333
- loss:ContrastiveLoss
widget:
- source_sentence: What is the capital of Canada?
sentences:
- Main ingredient in guacamole
- Prime Minister of the United Kingdom
- What is the capital of Australia?
- source_sentence: What is the freezing point of water?
sentences:
- Paracetamol side effects
- Temperature at which water freezes
- Who discovered electricity?
- source_sentence: Who invented the telephone?
sentences:
- Positive effects of exercise
- Current population of Japan
- Who created the telephone?
- source_sentence: Who discovered gravity?
sentences:
- Steps to cook pasta
- Who found out about gravity?
- How to reset a password
- source_sentence: What is the capital of Italy?
sentences:
- What is water's chemical formula?
- Italy's capital city
- I need help with my homework
model-index:
- name: SentenceTransformer based on intfloat/multilingual-e5-small
results:
- task:
type: binary-classification
name: Binary Classification
dataset:
name: pair class dev
type: pair-class-dev
metrics:
- type: cosine_accuracy
value: 1
name: Cosine Accuracy
- type: cosine_accuracy_threshold
value: 0.8237255811691284
name: Cosine Accuracy Threshold
- type: cosine_f1
value: 1
name: Cosine F1
- type: cosine_f1_threshold
value: 0.8237255811691284
name: Cosine F1 Threshold
- type: cosine_precision
value: 1
name: Cosine Precision
- type: cosine_recall
value: 1
name: Cosine Recall
- type: cosine_ap
value: 1
name: Cosine Ap
- type: dot_accuracy
value: 1
name: Dot Accuracy
- type: dot_accuracy_threshold
value: 0.8237255215644836
name: Dot Accuracy Threshold
- type: dot_f1
value: 1
name: Dot F1
- type: dot_f1_threshold
value: 0.8237255215644836
name: Dot F1 Threshold
- type: dot_precision
value: 1
name: Dot Precision
- type: dot_recall
value: 1
name: Dot Recall
- type: dot_ap
value: 1
name: Dot Ap
- type: manhattan_accuracy
value: 0.972972972972973
name: Manhattan Accuracy
- type: manhattan_accuracy_threshold
value: 7.9234113693237305
name: Manhattan Accuracy Threshold
- type: manhattan_f1
value: 0.9795918367346939
name: Manhattan F1
- type: manhattan_f1_threshold
value: 9.902971267700195
name: Manhattan F1 Threshold
- type: manhattan_precision
value: 0.96
name: Manhattan Precision
- type: manhattan_recall
value: 1
name: Manhattan Recall
- type: manhattan_ap
value: 0.9983333333333333
name: Manhattan Ap
- type: euclidean_accuracy
value: 1
name: Euclidean Accuracy
- type: euclidean_accuracy_threshold
value: 0.5937579870223999
name: Euclidean Accuracy Threshold
- type: euclidean_f1
value: 1
name: Euclidean F1
- type: euclidean_f1_threshold
value: 0.5937579870223999
name: Euclidean F1 Threshold
- type: euclidean_precision
value: 1
name: Euclidean Precision
- type: euclidean_recall
value: 1
name: Euclidean Recall
- type: euclidean_ap
value: 1
name: Euclidean Ap
- type: max_accuracy
value: 1
name: Max Accuracy
- type: max_accuracy_threshold
value: 7.9234113693237305
name: Max Accuracy Threshold
- type: max_f1
value: 1
name: Max F1
- type: max_f1_threshold
value: 9.902971267700195
name: Max F1 Threshold
- type: max_precision
value: 1
name: Max Precision
- type: max_recall
value: 1
name: Max Recall
- type: max_ap
value: 1
name: Max Ap
- task:
type: binary-classification
name: Binary Classification
dataset:
name: pair class test
type: pair-class-test
metrics:
- type: cosine_accuracy
value: 1
name: Cosine Accuracy
- type: cosine_accuracy_threshold
value: 0.8052735328674316
name: Cosine Accuracy Threshold
- type: cosine_f1
value: 1
name: Cosine F1
- type: cosine_f1_threshold
value: 0.8052735328674316
name: Cosine F1 Threshold
- type: cosine_precision
value: 1
name: Cosine Precision
- type: cosine_recall
value: 1
name: Cosine Recall
- type: cosine_ap
value: 1
name: Cosine Ap
- type: dot_accuracy
value: 1
name: Dot Accuracy
- type: dot_accuracy_threshold
value: 0.8052735328674316
name: Dot Accuracy Threshold
- type: dot_f1
value: 1
name: Dot F1
- type: dot_f1_threshold
value: 0.8052735328674316
name: Dot F1 Threshold
- type: dot_precision
value: 1
name: Dot Precision
- type: dot_recall
value: 1
name: Dot Recall
- type: dot_ap
value: 1
name: Dot Ap
- type: manhattan_accuracy
value: 1
name: Manhattan Accuracy
- type: manhattan_accuracy_threshold
value: 9.779541969299316
name: Manhattan Accuracy Threshold
- type: manhattan_f1
value: 1
name: Manhattan F1
- type: manhattan_f1_threshold
value: 9.779541969299316
name: Manhattan F1 Threshold
- type: manhattan_precision
value: 1
name: Manhattan Precision
- type: manhattan_recall
value: 1
name: Manhattan Recall
- type: manhattan_ap
value: 1
name: Manhattan Ap
- type: euclidean_accuracy
value: 1
name: Euclidean Accuracy
- type: euclidean_accuracy_threshold
value: 0.6235698461532593
name: Euclidean Accuracy Threshold
- type: euclidean_f1
value: 1
name: Euclidean F1
- type: euclidean_f1_threshold
value: 0.6235698461532593
name: Euclidean F1 Threshold
- type: euclidean_precision
value: 1
name: Euclidean Precision
- type: euclidean_recall
value: 1
name: Euclidean Recall
- type: euclidean_ap
value: 1
name: Euclidean Ap
- type: max_accuracy
value: 1
name: Max Accuracy
- type: max_accuracy_threshold
value: 9.779541969299316
name: Max Accuracy Threshold
- type: max_f1
value: 1
name: Max F1
- type: max_f1_threshold
value: 9.779541969299316
name: Max F1 Threshold
- type: max_precision
value: 1
name: Max Precision
- type: max_recall
value: 1
name: Max Recall
- type: max_ap
value: 1
name: Max Ap
SentenceTransformer based on intfloat/multilingual-e5-small
This is a sentence-transformers model finetuned from intfloat/multilingual-e5-small. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
Model Details
Model Description
- Model Type: Sentence Transformer
- Base model: intfloat/multilingual-e5-small
- Maximum Sequence Length: 512 tokens
- Output Dimensionality: 384 tokens
- Similarity Function: Cosine Similarity
Model Sources
- Documentation: Sentence Transformers Documentation
- Repository: Sentence Transformers on GitHub
- Hugging Face: Sentence Transformers on Hugging Face
Full Model Architecture
SentenceTransformer(
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
(2): Normalize()
)
Usage
Direct Usage (Sentence Transformers)
First install the Sentence Transformers library:
pip install -U sentence-transformers
Then you can load this model and run inference.
from sentence_transformers import SentenceTransformer
# Download from the 🤗 Hub
model = SentenceTransformer("srikarvar/multilingual-e5-small-cogcache-contrastive")
# Run inference
sentences = [
'What is the capital of Italy?',
"Italy's capital city",
'I need help with my homework',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 384]
# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]
Evaluation
Metrics
Binary Classification
- Dataset:
pair-class-dev - Evaluated with
BinaryClassificationEvaluator
| Metric | Value |
|---|---|
| cosine_accuracy | 1.0 |
| cosine_accuracy_threshold | 0.8237 |
| cosine_f1 | 1.0 |
| cosine_f1_threshold | 0.8237 |
| cosine_precision | 1.0 |
| cosine_recall | 1.0 |
| cosine_ap | 1.0 |
| dot_accuracy | 1.0 |
| dot_accuracy_threshold | 0.8237 |
| dot_f1 | 1.0 |
| dot_f1_threshold | 0.8237 |
| dot_precision | 1.0 |
| dot_recall | 1.0 |
| dot_ap | 1.0 |
| manhattan_accuracy | 0.973 |
| manhattan_accuracy_threshold | 7.9234 |
| manhattan_f1 | 0.9796 |
| manhattan_f1_threshold | 9.903 |
| manhattan_precision | 0.96 |
| manhattan_recall | 1.0 |
| manhattan_ap | 0.9983 |
| euclidean_accuracy | 1.0 |
| euclidean_accuracy_threshold | 0.5938 |
| euclidean_f1 | 1.0 |
| euclidean_f1_threshold | 0.5938 |
| euclidean_precision | 1.0 |
| euclidean_recall | 1.0 |
| euclidean_ap | 1.0 |
| max_accuracy | 1.0 |
| max_accuracy_threshold | 7.9234 |
| max_f1 | 1.0 |
| max_f1_threshold | 9.903 |
| max_precision | 1.0 |
| max_recall | 1.0 |
| max_ap | 1.0 |
Binary Classification
- Dataset:
pair-class-test - Evaluated with
BinaryClassificationEvaluator
| Metric | Value |
|---|---|
| cosine_accuracy | 1.0 |
| cosine_accuracy_threshold | 0.8053 |
| cosine_f1 | 1.0 |
| cosine_f1_threshold | 0.8053 |
| cosine_precision | 1.0 |
| cosine_recall | 1.0 |
| cosine_ap | 1.0 |
| dot_accuracy | 1.0 |
| dot_accuracy_threshold | 0.8053 |
| dot_f1 | 1.0 |
| dot_f1_threshold | 0.8053 |
| dot_precision | 1.0 |
| dot_recall | 1.0 |
| dot_ap | 1.0 |
| manhattan_accuracy | 1.0 |
| manhattan_accuracy_threshold | 9.7795 |
| manhattan_f1 | 1.0 |
| manhattan_f1_threshold | 9.7795 |
| manhattan_precision | 1.0 |
| manhattan_recall | 1.0 |
| manhattan_ap | 1.0 |
| euclidean_accuracy | 1.0 |
| euclidean_accuracy_threshold | 0.6236 |
| euclidean_f1 | 1.0 |
| euclidean_f1_threshold | 0.6236 |
| euclidean_precision | 1.0 |
| euclidean_recall | 1.0 |
| euclidean_ap | 1.0 |
| max_accuracy | 1.0 |
| max_accuracy_threshold | 9.7795 |
| max_f1 | 1.0 |
| max_f1_threshold | 9.7795 |
| max_precision | 1.0 |
| max_recall | 1.0 |
| max_ap | 1.0 |
Training Details
Training Dataset
Unnamed Dataset
- Size: 333 training samples
- Columns:
sentence1,label, andsentence2 - Approximate statistics based on the first 1000 samples:
sentence1 label sentence2 type string int string details - min: 6 tokens
- mean: 10.25 tokens
- max: 20 tokens
- 0: ~51.65%
- 1: ~48.35%
- min: 4 tokens
- mean: 9.42 tokens
- max: 22 tokens
- Samples:
sentence1 label sentence2 How to improve my credit score?1Improving my credit score tipsHow does photosynthesis work?0What are the steps of photosynthesis?What is the population of Germany?0How many people live in Berlin? - Loss:
ContrastiveLosswith these parameters:{ "distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE", "margin": 0.5, "size_average": true }
Evaluation Dataset
Unnamed Dataset
- Size: 37 evaluation samples
- Columns:
sentence1,label, andsentence2 - Approximate statistics based on the first 1000 samples:
sentence1 label sentence2 type string int string details - min: 7 tokens
- mean: 10.0 tokens
- max: 13 tokens
- 0: ~35.14%
- 1: ~64.86%
- min: 6 tokens
- mean: 8.68 tokens
- max: 12 tokens
- Samples:
sentence1 label sentence2 What is the price of Bitcoin?1Bitcoin's current valueWho discovered gravity?1Who found out about gravity?What is the most spoken language in the world?1Language spoken by the most people - Loss:
ContrastiveLosswith these parameters:{ "distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE", "margin": 0.5, "size_average": true }
Training Hyperparameters
Non-Default Hyperparameters
eval_strategy: epochper_device_train_batch_size: 16per_device_eval_batch_size: 16gradient_accumulation_steps: 2learning_rate: 3e-05weight_decay: 0.01num_train_epochs: 5lr_scheduler_type: reduce_lr_on_plateauwarmup_ratio: 0.1load_best_model_at_end: Trueoptim: adamw_torch_fusedbatch_sampler: no_duplicates
All Hyperparameters
Click to expand
overwrite_output_dir: Falsedo_predict: Falseeval_strategy: epochprediction_loss_only: Trueper_device_train_batch_size: 16per_device_eval_batch_size: 16per_gpu_train_batch_size: Noneper_gpu_eval_batch_size: Nonegradient_accumulation_steps: 2eval_accumulation_steps: Nonelearning_rate: 3e-05weight_decay: 0.01adam_beta1: 0.9adam_beta2: 0.999adam_epsilon: 1e-08max_grad_norm: 1.0num_train_epochs: 5max_steps: -1lr_scheduler_type: reduce_lr_on_plateaulr_scheduler_kwargs: {}warmup_ratio: 0.1warmup_steps: 0log_level: passivelog_level_replica: warninglog_on_each_node: Truelogging_nan_inf_filter: Truesave_safetensors: Truesave_on_each_node: Falsesave_only_model: Falserestore_callback_states_from_checkpoint: Falseno_cuda: Falseuse_cpu: Falseuse_mps_device: Falseseed: 42data_seed: Nonejit_mode_eval: Falseuse_ipex: Falsebf16: Falsefp16: Falsefp16_opt_level: O1half_precision_backend: autobf16_full_eval: Falsefp16_full_eval: Falsetf32: Nonelocal_rank: 0ddp_backend: Nonetpu_num_cores: Nonetpu_metrics_debug: Falsedebug: []dataloader_drop_last: Falsedataloader_num_workers: 0dataloader_prefetch_factor: Nonepast_index: -1disable_tqdm: Falseremove_unused_columns: Truelabel_names: Noneload_best_model_at_end: Trueignore_data_skip: Falsefsdp: []fsdp_min_num_params: 0fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}fsdp_transformer_layer_cls_to_wrap: Noneaccelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}deepspeed: Nonelabel_smoothing_factor: 0.0optim: adamw_torch_fusedoptim_args: Noneadafactor: Falsegroup_by_length: Falselength_column_name: lengthddp_find_unused_parameters: Noneddp_bucket_cap_mb: Noneddp_broadcast_buffers: Falsedataloader_pin_memory: Truedataloader_persistent_workers: Falseskip_memory_metrics: Trueuse_legacy_prediction_loop: Falsepush_to_hub: Falseresume_from_checkpoint: Nonehub_model_id: Nonehub_strategy: every_savehub_private_repo: Falsehub_always_push: Falsegradient_checkpointing: Falsegradient_checkpointing_kwargs: Noneinclude_inputs_for_metrics: Falseeval_do_concat_batches: Truefp16_backend: autopush_to_hub_model_id: Nonepush_to_hub_organization: Nonemp_parameters:auto_find_batch_size: Falsefull_determinism: Falsetorchdynamo: Noneray_scope: lastddp_timeout: 1800torch_compile: Falsetorch_compile_backend: Nonetorch_compile_mode: Nonedispatch_batches: Nonesplit_batches: Noneinclude_tokens_per_second: Falseinclude_num_input_tokens_seen: Falseneftune_noise_alpha: Noneoptim_target_modules: Nonebatch_eval_metrics: Falsebatch_sampler: no_duplicatesmulti_dataset_batch_sampler: proportional
Training Logs
| Epoch | Step | Training Loss | loss | pair-class-dev_max_ap | pair-class-test_max_ap |
|---|---|---|---|---|---|
| 0 | 0 | - | - | 0.8544 | - |
| 0.9524 | 10 | 0.0318 | 0.0106 | 0.9935 | - |
| 1.9048 | 20 | 0.0126 | - | - | - |
| 2.0 | 21 | - | 0.0043 | 1.0 | - |
| 2.8571 | 30 | 0.008 | - | - | - |
| 2.9524 | 31 | - | 0.004 | 1.0 | - |
| 3.8095 | 40 | 0.0056 | - | - | - |
| 4.0 | 42 | - | 0.0040 | 1.0 | - |
| 4.7619 | 50 | 0.0039 | 0.0045 | 1.0 | 1.0 |
- The bold row denotes the saved checkpoint.
Framework Versions
- Python: 3.10.12
- Sentence Transformers: 3.0.1
- Transformers: 4.41.2
- PyTorch: 2.1.2+cu121
- Accelerate: 0.32.1
- Datasets: 2.19.1
- Tokenizers: 0.19.1
Citation
BibTeX
Sentence Transformers
@inproceedings{reimers-2019-sentence-bert,
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
author = "Reimers, Nils and Gurevych, Iryna",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = "11",
year = "2019",
publisher = "Association for Computational Linguistics",
url = "https://arxiv.org/abs/1908.10084",
}
ContrastiveLoss
@inproceedings{hadsell2006dimensionality,
author={Hadsell, R. and Chopra, S. and LeCun, Y.},
booktitle={2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'06)},
title={Dimensionality Reduction by Learning an Invariant Mapping},
year={2006},
volume={2},
number={},
pages={1735-1742},
doi={10.1109/CVPR.2006.100}
}