orionweller commited on
Commit
455ad07
·
verified ·
1 Parent(s): 2ea1271

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +16 -2
config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_name_or_path": "ettin-decoder-32m",
3
  "architectures": [
4
- "ModernBERTDecoder"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
@@ -9,6 +9,7 @@
9
  "classifier_activation": "gelu",
10
  "classifier_bias": false,
11
  "classifier_dropout": 0.0,
 
12
  "cls_token_id": 50281,
13
  "decoder_bias": true,
14
  "deterministic_flash_attn": false,
@@ -46,5 +47,18 @@
46
  "eos_token": "[SEP]",
47
  "bos_token": "[CLS]",
48
  "tokenizer_class": "PreTrainedTokenizerFast",
49
- "unk_token": "[UNK]"
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  }
 
1
  {
2
  "_name_or_path": "ettin-decoder-32m",
3
  "architectures": [
4
+ "ModernBertDecoderForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
 
9
  "classifier_activation": "gelu",
10
  "classifier_bias": false,
11
  "classifier_dropout": 0.0,
12
+ "classifier_pooling": "mean",
13
  "cls_token_id": 50281,
14
  "decoder_bias": true,
15
  "deterministic_flash_attn": false,
 
47
  "eos_token": "[SEP]",
48
  "bos_token": "[CLS]",
49
  "tokenizer_class": "PreTrainedTokenizerFast",
50
+ "unk_token": "[UNK]",
51
+ "layer_types": [
52
+ "full_attention",
53
+ "sliding_attention",
54
+ "sliding_attention",
55
+ "full_attention",
56
+ "sliding_attention",
57
+ "sliding_attention",
58
+ "full_attention",
59
+ "sliding_attention",
60
+ "sliding_attention",
61
+ "full_attention"
62
+ ],
63
+ "use_cache": true
64
  }