Feature Extraction
Transformers
PyTorch
e2d2
custom_code
File size: 1,613 Bytes
30e8556
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
{
  "T": 0,
  "architectures": [
    "E2D2"
  ],
  "attn_backend": "sdpa",
  "auto_map": {
    "AutoConfig": "diffusion.E2D2Config",
    "AutoModel": "diffusion.E2D2",
    "AutoModelForMaskedLM": "diffusion.E2D2"
  },
  "backbone_config": {
    "_target_": "backbone_encoder_decoder.LLMasEncoderDecoder",
    "attn_backend": "sdpa",
    "freeze_encoder": false,
    "hidden_size": 256,
    "intermediate_size": 768,
    "keep_top_decoder_layers": false,
    "keep_top_encoder_layers": false,
    "max_length": 1024,
    "num_decoder_layers": 8,
    "num_encoder_layers": 20,
    "pretrained_model_name_or_path": "Qwen/Qwen3-0.6B-Base",
    "reinit_decoder": true,
    "reinit_encoder": true,
    "tie_encoder_decoder_weights": false,
    "use_encoder_causal_mask": false,
    "use_gradient_checkpointing": false
  },
  "block_size": 8,
  "bos_token_id": 151643,
  "diffusion_type": "absorbing",
  "eos_token_id": 151643,
  "eval_block_size": 8,
  "keep_clean_bos": true,
  "length": 1024,
  "mask_token_id": 151660,
  "model_type": "e2d2",
  "noise_config": {
    "_target_": "noise_schedule_noise_schedules.LinearNoise"
  },
  "pad_token_id": 151643,
  "pad_vocab_size_multiple": 1,
  "shift_logits": false,
  "time_conditioned_backbone": false,
  "tokenization_config": {
    "bos_token_id": 151643,
    "eos_token_id": 151643,
    "mask_token_id": 151660,
    "pad_token_id": 151643,
    "pad_vocab_size_multiple": 1,
    "vocab_size": 151669
  },
  "tokenizer_name": "Qwen/Qwen3-0.6B-Base",
  "torch_dtype": "float32",
  "train_on_context": false,
  "transformers_version": "4.52.4",
  "vocab_size": 151669
}