tomaarsen HF Staff commited on
Commit
d9e59ba
·
verified ·
1 Parent(s): e187ea4

Uploading CrossEncoder model.

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - sentence-transformers
4
+ - cross-encoder
5
+ - reranker
6
+ base_model: tomaarsen/Qwen3-Reranker-0.6B-seq-cls
7
+ pipeline_tag: text-ranking
8
+ library_name: sentence-transformers
9
+ ---
10
+
11
+ # CrossEncoder based on tomaarsen/Qwen3-Reranker-0.6B-seq-cls
12
+
13
+ This is a [Cross Encoder](https://www.sbert.net/docs/cross_encoder/usage/usage.html) model finetuned from [tomaarsen/Qwen3-Reranker-0.6B-seq-cls](https://huggingface.co/tomaarsen/Qwen3-Reranker-0.6B-seq-cls) using the [sentence-transformers](https://www.SBERT.net) library. It computes scores for pairs of texts, which can be used for text reranking and semantic search.
14
+
15
+ ## Model Details
16
+
17
+ ### Model Description
18
+ - **Model Type:** Cross Encoder
19
+ - **Base model:** [tomaarsen/Qwen3-Reranker-0.6B-seq-cls](https://huggingface.co/tomaarsen/Qwen3-Reranker-0.6B-seq-cls) <!-- at revision 6a5829f5079c66e78d911e06fe21931cc00232f7 -->
20
+ - **Maximum Sequence Length:** 40960 tokens
21
+ - **Number of Output Labels:** 1 label
22
+ <!-- - **Training Dataset:** Unknown -->
23
+ <!-- - **Language:** Unknown -->
24
+ <!-- - **License:** Unknown -->
25
+
26
+ ### Model Sources
27
+
28
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
29
+ - **Documentation:** [Cross Encoder Documentation](https://www.sbert.net/docs/cross_encoder/usage/usage.html)
30
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers)
31
+ - **Hugging Face:** [Cross Encoders on Hugging Face](https://huggingface.co/models?library=sentence-transformers&other=cross-encoder)
32
+
33
+ ## Usage
34
+
35
+ ### Direct Usage (Sentence Transformers)
36
+
37
+ First install the Sentence Transformers library:
38
+
39
+ ```bash
40
+ pip install -U sentence-transformers
41
+ ```
42
+
43
+ Then you can load this model and run inference.
44
+ ```python
45
+ from sentence_transformers import CrossEncoder
46
+
47
+ # Download from the 🤗 Hub
48
+ model = CrossEncoder("cross-encoder-testing/Qwen3-Reranker-0.6B-seq-cls-v6")
49
+ # Get scores for pairs of texts
50
+ pairs = [
51
+ ['How many calories in an egg', 'There are on average between 55 and 80 calories in an egg depending on its size.'],
52
+ ['How many calories in an egg', 'Egg whites are very low in calories, have no fat, no cholesterol, and are loaded with protein.'],
53
+ ['How many calories in an egg', 'Most of the calories in an egg come from the yellow yolk in the center.'],
54
+ ]
55
+ scores = model.predict(pairs)
56
+ print(scores.shape)
57
+ # (3,)
58
+
59
+ # Or rank different texts based on similarity to a single text
60
+ ranks = model.rank(
61
+ 'How many calories in an egg',
62
+ [
63
+ 'There are on average between 55 and 80 calories in an egg depending on its size.',
64
+ 'Egg whites are very low in calories, have no fat, no cholesterol, and are loaded with protein.',
65
+ 'Most of the calories in an egg come from the yellow yolk in the center.',
66
+ ]
67
+ )
68
+ # [{'corpus_id': ..., 'score': ...}, {'corpus_id': ..., 'score': ...}, ...]
69
+ ```
70
+
71
+ <!--
72
+ ### Direct Usage (Transformers)
73
+
74
+ <details><summary>Click to see the direct usage in Transformers</summary>
75
+
76
+ </details>
77
+ -->
78
+
79
+ <!--
80
+ ### Downstream Usage (Sentence Transformers)
81
+
82
+ You can finetune this model on your own dataset.
83
+
84
+ <details><summary>Click to expand</summary>
85
+
86
+ </details>
87
+ -->
88
+
89
+ <!--
90
+ ### Out-of-Scope Use
91
+
92
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
93
+ -->
94
+
95
+ <!--
96
+ ## Bias, Risks and Limitations
97
+
98
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
99
+ -->
100
+
101
+ <!--
102
+ ### Recommendations
103
+
104
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
105
+ -->
106
+
107
+ ## Training Details
108
+
109
+ ### Framework Versions
110
+ - Python: 3.11.6
111
+ - Sentence Transformers: 5.3.0.dev0
112
+ - Transformers: 5.0.0rc1
113
+ - PyTorch: 2.9.1+cu126
114
+ - Accelerate: 1.6.0
115
+ - Datasets: 4.2.0
116
+ - Tokenizers: 0.22.1
117
+
118
+ ## Citation
119
+
120
+ ### BibTeX
121
+
122
+ <!--
123
+ ## Glossary
124
+
125
+ *Clearly define terms in order to be accessible across audiences.*
126
+ -->
127
+
128
+ <!--
129
+ ## Model Card Authors
130
+
131
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
132
+ -->
133
+
134
+ <!--
135
+ ## Model Card Contact
136
+
137
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
138
+ -->
chat_template.jinja ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <|im_start|>system
2
+ Judge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be "yes" or "no".<|im_end|>
3
+ <|im_start|>user
4
+ <Instruct>: {{ messages[0]["content"] | default("Given a web search query, retrieve relevant passages that answer the query") }}
5
+ <Query>: {{ messages[1]["content"] }}
6
+ <Document>: {{ messages[2]["content"] }}<|im_end|>
7
+ <|im_start|>assistant
8
+ <think>
9
+
10
+ </think>
11
+
12
+
config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForSequenceClassification"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "dtype": "float32",
9
+ "eos_token_id": 151645,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 1024,
13
+ "id2label": {
14
+ "0": "LABEL_0"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "LABEL_0": 0
20
+ },
21
+ "layer_types": [
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention"
50
+ ],
51
+ "max_position_embeddings": 40960,
52
+ "max_window_layers": 28,
53
+ "model_type": "qwen3",
54
+ "num_attention_heads": 16,
55
+ "num_hidden_layers": 28,
56
+ "num_key_value_heads": 8,
57
+ "pad_token_id": 151643,
58
+ "rms_norm_eps": 1e-06,
59
+ "rope_parameters": {
60
+ "rope_theta": 1000000,
61
+ "rope_type": "default"
62
+ },
63
+ "sliding_window": null,
64
+ "tie_word_embeddings": true,
65
+ "transformers_version": "5.0.0rc1",
66
+ "use_cache": true,
67
+ "use_sliding_window": false,
68
+ "vocab_size": 151669
69
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "CrossEncoder",
3
+ "__version__": {
4
+ "sentence_transformers": "5.3.0.dev0",
5
+ "transformers": "5.0.0rc1",
6
+ "pytorch": "2.9.1+cu126"
7
+ },
8
+ "prompts": {
9
+ "web_search": "Given a web search query, retrieve relevant passages that answer the query"
10
+ },
11
+ "default_prompt_name": "web_search",
12
+ "activation_fn": "torch.nn.modules.activation.Sigmoid"
13
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b223e576dc70d8832372a538d4c8458dc25ce9daf209ac47cec4799c85e4da7
3
+ size 2383145520
modules.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.base.models.Transformer"
7
+ }
8
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "transformer_task": "sequence-classification",
3
+ "modality_config": {
4
+ "text": {
5
+ "method": "forward",
6
+ "method_output_name": "logits"
7
+ },
8
+ "message": {
9
+ "method": "forward",
10
+ "method_output_name": "logits"
11
+ }
12
+ },
13
+ "module_output_name": "scores"
14
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72e1b8509eea3ec6cc1a3226abd5205fbd17c559fea81dc4b70ed9100449833b
3
+ size 11422906
tokenizer_config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "additional_special_tokens": [
4
+ "<|im_start|>",
5
+ "<|im_end|>",
6
+ "<|object_ref_start|>",
7
+ "<|object_ref_end|>",
8
+ "<|box_start|>",
9
+ "<|box_end|>",
10
+ "<|quad_start|>",
11
+ "<|quad_end|>",
12
+ "<|vision_start|>",
13
+ "<|vision_end|>",
14
+ "<|vision_pad|>",
15
+ "<|image_pad|>",
16
+ "<|video_pad|>"
17
+ ],
18
+ "backend": "tokenizers",
19
+ "bos_token": null,
20
+ "clean_up_tokenization_spaces": false,
21
+ "eos_token": "<|im_end|>",
22
+ "errors": "replace",
23
+ "extra_special_tokens": [
24
+ "<|im_start|>",
25
+ "<|im_end|>",
26
+ "<|object_ref_start|>",
27
+ "<|object_ref_end|>",
28
+ "<|box_start|>",
29
+ "<|box_end|>",
30
+ "<|quad_start|>",
31
+ "<|quad_end|>",
32
+ "<|vision_start|>",
33
+ "<|vision_end|>",
34
+ "<|vision_pad|>",
35
+ "<|image_pad|>",
36
+ "<|video_pad|>"
37
+ ],
38
+ "is_local": false,
39
+ "model_max_length": 40960,
40
+ "model_specific_special_tokens": {},
41
+ "pad_token": "<|endoftext|>",
42
+ "split_special_tokens": false,
43
+ "tokenizer_class": "Qwen2Tokenizer",
44
+ "unk_token": null
45
+ }