{ "A_init_range": [ 1, 16 ], "architectures": [ "DragonForCausalLM" ], "attention_bias": false, "attention_dropout": 0.0, "auto_map": { "AutoConfig": "configuration_dragon.DragonConfig", "AutoModel": "modeling_dragon.DragonModel", "AutoModelForCausalLM": "modeling_dragon.DragonForCausalLM" }, "bos_token_id": 0, "conv_kernel": 4, "dtype": "bfloat16", "eos_token_id": 0, "eot_token_id": 0, "expand_factor": 2, "hidden_dropout": 0.0, "hidden_size": 2048, "initializer_range": 0.006, "intermediate_size": 8192, "layers_config": "lrlrdlrlrlrlrdlrlrlrlrdlrlrlrlrdlrlr", "max_position_embeddings": 32768, "mlp_bias": false, "mlp_hidden_act": "relu2", "model_type": "dragon", "norm_epsilon": 1e-06, "num_attention_heads": 32, "num_hidden_layers": 36, "num_key_value_heads": 16, "num_logits_to_keep": 1, "old_lns": true, "pad_token_id": 0, "qk_norm": true, "residual_in_fp32": false, "rope_theta": 163, "scalable_softmax": true, "sliding_window_size": 1024, "slw_wsize": 32768, "softcap_global_attn": 150.0, "softcap_local_attn": 0.0, "tie_word_embeddings": false, "time_step_floor": 0.0001, "time_step_max": 0.1, "time_step_min": 0.001, "transformers_version": "4.56.1", "uscaling_tau": 0.2, "use_bias": false, "use_cache": true, "use_uscaling": false, "vocab_size": 196736 }