{ "attention_qkv_bias": false, "codebook_size": 2048, "dim": 768, "dropout": 0.1, "fast_attention_qkv_bias": false, "fast_dim": 768, "fast_head_dim": 64, "fast_intermediate_size": 3072, "fast_n_head": 12, "fast_n_local_heads": 4, "head_dim": 64, "initializer_range": 0.041666666666666664, "intermediate_size": 3072, "is_reward_model": false, "max_seq_len": 2048, "model_type": "dual_ar", "n_fast_layer": 4, "n_head": 12, "n_layer": 10, "n_local_heads": 4, "norm_eps": 1e-5, "num_codebooks": 8, "rope_base": 100000, "scale_codebook_embeddings": false, "share_codebook_embeddings": true, "tie_word_embeddings": true, "use_gradient_checkpointing": true, "vocab_size": 2368, "depthwise_wte": true, "depthwise_output": true, "duplicate_code_0": false }