lazarevich commited on
Commit
7d219fc
·
1 Parent(s): afefc48

add missing MTP layers

Browse files
README.md CHANGED
@@ -33,7 +33,7 @@ Introducing **GLM-4.5-Air-REAP-82B-A12B**, a **memory-efficient compressed varia
33
  This model was created using **REAP (Router-weighted Expert Activation Pruning)**, a novel expert pruning method that selectively removes redundant experts while preserving the router's independent control over remaining experts. Key features include:
34
 
35
  - **Near-Lossless Performance**: Maintains almost identical accuracy on code generation, agentic coding, and function calling tasks compared to the full 480B model
36
- - **25% Memory Reduction**: Compressed from 480B to 363B parameters, significantly lowering deployment costs and memory requirements
37
  - **Preserved Capabilities**: Retains all core functionalities including code generation, agentic workflows, repository-scale understanding, and function calling
38
  - **Drop-in Compatibility**: Works with vanilla vLLM - no source modifications or custom patches required
39
  - **Optimized for Real-World Use**: Particularly effective for resource-constrained environments, local deployments, and academic research
 
33
  This model was created using **REAP (Router-weighted Expert Activation Pruning)**, a novel expert pruning method that selectively removes redundant experts while preserving the router's independent control over remaining experts. Key features include:
34
 
35
  - **Near-Lossless Performance**: Maintains almost identical accuracy on code generation, agentic coding, and function calling tasks compared to the full 480B model
36
+ - **25% Memory Reduction**: Compressed from 106B to 82B parameters, significantly lowering deployment costs and memory requirements
37
  - **Preserved Capabilities**: Retains all core functionalities including code generation, agentic workflows, repository-scale understanding, and function calling
38
  - **Drop-in Compatibility**: Works with vanilla vLLM - no source modifications or custom patches required
39
  - **Optimized for Real-World Use**: Particularly effective for resource-constrained environments, local deployments, and academic research
config.json CHANGED
@@ -40,4 +40,4 @@
40
  "use_cache": true,
41
  "use_qk_norm": false,
42
  "vocab_size": 151552
43
- }
 
40
  "use_cache": true,
41
  "use_qk_norm": false,
42
  "vocab_size": 151552
43
+ }
model-00033-of-00033.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7758776512d90f231992766c306fcb206dcd85f1d691004afe330b0bd7554e7f
3
- size 4195148160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee5542b6a78d0d1675e472534632ff1d295e357990980d9e1696050d75301c5c
3
+ size 10320775392
model.safetensors.index.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "metadata": {
3
  "total_parameters": 81932181504,
4
- "total_size": 163864380288
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00033-of-00033.safetensors",
@@ -13608,6 +13608,314 @@
13608
  "model.layers.9.self_attn.q_proj.weight": "model-00007-of-00033.safetensors",
13609
  "model.layers.9.self_attn.v_proj.bias": "model-00007-of-00033.safetensors",
13610
  "model.layers.9.self_attn.v_proj.weight": "model-00007-of-00033.safetensors",
13611
- "model.norm.weight": "model-00033-of-00033.safetensors"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13612
  }
13613
- }
 
1
  {
2
  "metadata": {
3
  "total_parameters": 81932181504,
4
+ "total_size": 169991698312
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00033-of-00033.safetensors",
 
13608
  "model.layers.9.self_attn.q_proj.weight": "model-00007-of-00033.safetensors",
13609
  "model.layers.9.self_attn.v_proj.bias": "model-00007-of-00033.safetensors",
13610
  "model.layers.9.self_attn.v_proj.weight": "model-00007-of-00033.safetensors",
13611
+ "model.norm.weight": "model-00033-of-00033.safetensors",
13612
+ "model.layers.46.embed_tokens.weight": "model-00033-of-00033.safetensors",
13613
+ "model.layers.46.shared_head.head.weight": "model-00033-of-00033.safetensors",
13614
+ "model.layers.46.eh_proj.weight": "model-00033-of-00033.safetensors",
13615
+ "model.layers.46.enorm.weight": "model-00033-of-00033.safetensors",
13616
+ "model.layers.46.hnorm.weight": "model-00033-of-00033.safetensors",
13617
+ "model.layers.46.input_layernorm.weight": "model-00033-of-00033.safetensors",
13618
+ "model.layers.46.mlp.experts.0.down_proj.weight": "model-00033-of-00033.safetensors",
13619
+ "model.layers.46.mlp.experts.0.gate_proj.weight": "model-00033-of-00033.safetensors",
13620
+ "model.layers.46.mlp.experts.0.up_proj.weight": "model-00033-of-00033.safetensors",
13621
+ "model.layers.46.mlp.experts.1.down_proj.weight": "model-00033-of-00033.safetensors",
13622
+ "model.layers.46.mlp.experts.1.gate_proj.weight": "model-00033-of-00033.safetensors",
13623
+ "model.layers.46.mlp.experts.1.up_proj.weight": "model-00033-of-00033.safetensors",
13624
+ "model.layers.46.mlp.experts.10.down_proj.weight": "model-00033-of-00033.safetensors",
13625
+ "model.layers.46.mlp.experts.10.gate_proj.weight": "model-00033-of-00033.safetensors",
13626
+ "model.layers.46.mlp.experts.10.up_proj.weight": "model-00033-of-00033.safetensors",
13627
+ "model.layers.46.mlp.experts.11.down_proj.weight": "model-00033-of-00033.safetensors",
13628
+ "model.layers.46.mlp.experts.11.gate_proj.weight": "model-00033-of-00033.safetensors",
13629
+ "model.layers.46.mlp.experts.11.up_proj.weight": "model-00033-of-00033.safetensors",
13630
+ "model.layers.46.mlp.experts.12.down_proj.weight": "model-00033-of-00033.safetensors",
13631
+ "model.layers.46.mlp.experts.12.gate_proj.weight": "model-00033-of-00033.safetensors",
13632
+ "model.layers.46.mlp.experts.12.up_proj.weight": "model-00033-of-00033.safetensors",
13633
+ "model.layers.46.mlp.experts.13.down_proj.weight": "model-00033-of-00033.safetensors",
13634
+ "model.layers.46.mlp.experts.13.gate_proj.weight": "model-00033-of-00033.safetensors",
13635
+ "model.layers.46.mlp.experts.13.up_proj.weight": "model-00033-of-00033.safetensors",
13636
+ "model.layers.46.mlp.experts.14.down_proj.weight": "model-00033-of-00033.safetensors",
13637
+ "model.layers.46.mlp.experts.14.gate_proj.weight": "model-00033-of-00033.safetensors",
13638
+ "model.layers.46.mlp.experts.14.up_proj.weight": "model-00033-of-00033.safetensors",
13639
+ "model.layers.46.mlp.experts.15.down_proj.weight": "model-00033-of-00033.safetensors",
13640
+ "model.layers.46.mlp.experts.15.gate_proj.weight": "model-00033-of-00033.safetensors",
13641
+ "model.layers.46.mlp.experts.15.up_proj.weight": "model-00033-of-00033.safetensors",
13642
+ "model.layers.46.mlp.experts.16.down_proj.weight": "model-00033-of-00033.safetensors",
13643
+ "model.layers.46.mlp.experts.16.gate_proj.weight": "model-00033-of-00033.safetensors",
13644
+ "model.layers.46.mlp.experts.16.up_proj.weight": "model-00033-of-00033.safetensors",
13645
+ "model.layers.46.mlp.experts.17.down_proj.weight": "model-00033-of-00033.safetensors",
13646
+ "model.layers.46.mlp.experts.17.gate_proj.weight": "model-00033-of-00033.safetensors",
13647
+ "model.layers.46.mlp.experts.17.up_proj.weight": "model-00033-of-00033.safetensors",
13648
+ "model.layers.46.mlp.experts.18.down_proj.weight": "model-00033-of-00033.safetensors",
13649
+ "model.layers.46.mlp.experts.18.gate_proj.weight": "model-00033-of-00033.safetensors",
13650
+ "model.layers.46.mlp.experts.18.up_proj.weight": "model-00033-of-00033.safetensors",
13651
+ "model.layers.46.mlp.experts.19.down_proj.weight": "model-00033-of-00033.safetensors",
13652
+ "model.layers.46.mlp.experts.19.gate_proj.weight": "model-00033-of-00033.safetensors",
13653
+ "model.layers.46.mlp.experts.19.up_proj.weight": "model-00033-of-00033.safetensors",
13654
+ "model.layers.46.mlp.experts.2.down_proj.weight": "model-00033-of-00033.safetensors",
13655
+ "model.layers.46.mlp.experts.2.gate_proj.weight": "model-00033-of-00033.safetensors",
13656
+ "model.layers.46.mlp.experts.2.up_proj.weight": "model-00033-of-00033.safetensors",
13657
+ "model.layers.46.mlp.experts.20.down_proj.weight": "model-00033-of-00033.safetensors",
13658
+ "model.layers.46.mlp.experts.20.gate_proj.weight": "model-00033-of-00033.safetensors",
13659
+ "model.layers.46.mlp.experts.20.up_proj.weight": "model-00033-of-00033.safetensors",
13660
+ "model.layers.46.mlp.experts.21.down_proj.weight": "model-00033-of-00033.safetensors",
13661
+ "model.layers.46.mlp.experts.21.gate_proj.weight": "model-00033-of-00033.safetensors",
13662
+ "model.layers.46.mlp.experts.21.up_proj.weight": "model-00033-of-00033.safetensors",
13663
+ "model.layers.46.mlp.experts.22.down_proj.weight": "model-00033-of-00033.safetensors",
13664
+ "model.layers.46.mlp.experts.22.gate_proj.weight": "model-00033-of-00033.safetensors",
13665
+ "model.layers.46.mlp.experts.22.up_proj.weight": "model-00033-of-00033.safetensors",
13666
+ "model.layers.46.mlp.experts.23.down_proj.weight": "model-00033-of-00033.safetensors",
13667
+ "model.layers.46.mlp.experts.23.gate_proj.weight": "model-00033-of-00033.safetensors",
13668
+ "model.layers.46.mlp.experts.23.up_proj.weight": "model-00033-of-00033.safetensors",
13669
+ "model.layers.46.mlp.experts.24.down_proj.weight": "model-00033-of-00033.safetensors",
13670
+ "model.layers.46.mlp.experts.24.gate_proj.weight": "model-00033-of-00033.safetensors",
13671
+ "model.layers.46.mlp.experts.24.up_proj.weight": "model-00033-of-00033.safetensors",
13672
+ "model.layers.46.mlp.experts.25.down_proj.weight": "model-00033-of-00033.safetensors",
13673
+ "model.layers.46.mlp.experts.25.gate_proj.weight": "model-00033-of-00033.safetensors",
13674
+ "model.layers.46.mlp.experts.25.up_proj.weight": "model-00033-of-00033.safetensors",
13675
+ "model.layers.46.mlp.experts.26.down_proj.weight": "model-00033-of-00033.safetensors",
13676
+ "model.layers.46.mlp.experts.26.gate_proj.weight": "model-00033-of-00033.safetensors",
13677
+ "model.layers.46.mlp.experts.26.up_proj.weight": "model-00033-of-00033.safetensors",
13678
+ "model.layers.46.mlp.experts.27.down_proj.weight": "model-00033-of-00033.safetensors",
13679
+ "model.layers.46.mlp.experts.27.gate_proj.weight": "model-00033-of-00033.safetensors",
13680
+ "model.layers.46.mlp.experts.27.up_proj.weight": "model-00033-of-00033.safetensors",
13681
+ "model.layers.46.mlp.experts.28.down_proj.weight": "model-00033-of-00033.safetensors",
13682
+ "model.layers.46.mlp.experts.28.gate_proj.weight": "model-00033-of-00033.safetensors",
13683
+ "model.layers.46.mlp.experts.28.up_proj.weight": "model-00033-of-00033.safetensors",
13684
+ "model.layers.46.mlp.experts.29.down_proj.weight": "model-00033-of-00033.safetensors",
13685
+ "model.layers.46.mlp.experts.29.gate_proj.weight": "model-00033-of-00033.safetensors",
13686
+ "model.layers.46.mlp.experts.29.up_proj.weight": "model-00033-of-00033.safetensors",
13687
+ "model.layers.46.mlp.experts.3.down_proj.weight": "model-00033-of-00033.safetensors",
13688
+ "model.layers.46.mlp.experts.3.gate_proj.weight": "model-00033-of-00033.safetensors",
13689
+ "model.layers.46.mlp.experts.3.up_proj.weight": "model-00033-of-00033.safetensors",
13690
+ "model.layers.46.mlp.experts.30.down_proj.weight": "model-00033-of-00033.safetensors",
13691
+ "model.layers.46.mlp.experts.30.gate_proj.weight": "model-00033-of-00033.safetensors",
13692
+ "model.layers.46.mlp.experts.30.up_proj.weight": "model-00033-of-00033.safetensors",
13693
+ "model.layers.46.mlp.experts.31.down_proj.weight": "model-00033-of-00033.safetensors",
13694
+ "model.layers.46.mlp.experts.31.gate_proj.weight": "model-00033-of-00033.safetensors",
13695
+ "model.layers.46.mlp.experts.31.up_proj.weight": "model-00033-of-00033.safetensors",
13696
+ "model.layers.46.mlp.experts.32.down_proj.weight": "model-00033-of-00033.safetensors",
13697
+ "model.layers.46.mlp.experts.32.gate_proj.weight": "model-00033-of-00033.safetensors",
13698
+ "model.layers.46.mlp.experts.32.up_proj.weight": "model-00033-of-00033.safetensors",
13699
+ "model.layers.46.mlp.experts.33.down_proj.weight": "model-00033-of-00033.safetensors",
13700
+ "model.layers.46.mlp.experts.33.gate_proj.weight": "model-00033-of-00033.safetensors",
13701
+ "model.layers.46.mlp.experts.33.up_proj.weight": "model-00033-of-00033.safetensors",
13702
+ "model.layers.46.mlp.experts.34.down_proj.weight": "model-00033-of-00033.safetensors",
13703
+ "model.layers.46.mlp.experts.34.gate_proj.weight": "model-00033-of-00033.safetensors",
13704
+ "model.layers.46.mlp.experts.34.up_proj.weight": "model-00033-of-00033.safetensors",
13705
+ "model.layers.46.mlp.experts.35.down_proj.weight": "model-00033-of-00033.safetensors",
13706
+ "model.layers.46.mlp.experts.35.gate_proj.weight": "model-00033-of-00033.safetensors",
13707
+ "model.layers.46.mlp.experts.35.up_proj.weight": "model-00033-of-00033.safetensors",
13708
+ "model.layers.46.mlp.experts.36.down_proj.weight": "model-00033-of-00033.safetensors",
13709
+ "model.layers.46.mlp.experts.36.gate_proj.weight": "model-00033-of-00033.safetensors",
13710
+ "model.layers.46.mlp.experts.36.up_proj.weight": "model-00033-of-00033.safetensors",
13711
+ "model.layers.46.mlp.experts.37.down_proj.weight": "model-00033-of-00033.safetensors",
13712
+ "model.layers.46.mlp.experts.37.gate_proj.weight": "model-00033-of-00033.safetensors",
13713
+ "model.layers.46.mlp.experts.37.up_proj.weight": "model-00033-of-00033.safetensors",
13714
+ "model.layers.46.mlp.experts.38.down_proj.weight": "model-00033-of-00033.safetensors",
13715
+ "model.layers.46.mlp.experts.38.gate_proj.weight": "model-00033-of-00033.safetensors",
13716
+ "model.layers.46.mlp.experts.38.up_proj.weight": "model-00033-of-00033.safetensors",
13717
+ "model.layers.46.mlp.experts.39.down_proj.weight": "model-00033-of-00033.safetensors",
13718
+ "model.layers.46.mlp.experts.39.gate_proj.weight": "model-00033-of-00033.safetensors",
13719
+ "model.layers.46.mlp.experts.39.up_proj.weight": "model-00033-of-00033.safetensors",
13720
+ "model.layers.46.mlp.experts.4.down_proj.weight": "model-00033-of-00033.safetensors",
13721
+ "model.layers.46.mlp.experts.4.gate_proj.weight": "model-00033-of-00033.safetensors",
13722
+ "model.layers.46.mlp.experts.4.up_proj.weight": "model-00033-of-00033.safetensors",
13723
+ "model.layers.46.mlp.experts.40.down_proj.weight": "model-00033-of-00033.safetensors",
13724
+ "model.layers.46.mlp.experts.40.gate_proj.weight": "model-00033-of-00033.safetensors",
13725
+ "model.layers.46.mlp.experts.40.up_proj.weight": "model-00033-of-00033.safetensors",
13726
+ "model.layers.46.mlp.experts.41.down_proj.weight": "model-00033-of-00033.safetensors",
13727
+ "model.layers.46.mlp.experts.41.gate_proj.weight": "model-00033-of-00033.safetensors",
13728
+ "model.layers.46.mlp.experts.41.up_proj.weight": "model-00033-of-00033.safetensors",
13729
+ "model.layers.46.mlp.experts.42.down_proj.weight": "model-00033-of-00033.safetensors",
13730
+ "model.layers.46.mlp.experts.42.gate_proj.weight": "model-00033-of-00033.safetensors",
13731
+ "model.layers.46.mlp.experts.42.up_proj.weight": "model-00033-of-00033.safetensors",
13732
+ "model.layers.46.mlp.experts.43.down_proj.weight": "model-00033-of-00033.safetensors",
13733
+ "model.layers.46.mlp.experts.43.gate_proj.weight": "model-00033-of-00033.safetensors",
13734
+ "model.layers.46.mlp.experts.43.up_proj.weight": "model-00033-of-00033.safetensors",
13735
+ "model.layers.46.mlp.experts.44.down_proj.weight": "model-00033-of-00033.safetensors",
13736
+ "model.layers.46.mlp.experts.44.gate_proj.weight": "model-00033-of-00033.safetensors",
13737
+ "model.layers.46.mlp.experts.44.up_proj.weight": "model-00033-of-00033.safetensors",
13738
+ "model.layers.46.mlp.experts.45.down_proj.weight": "model-00033-of-00033.safetensors",
13739
+ "model.layers.46.mlp.experts.45.gate_proj.weight": "model-00033-of-00033.safetensors",
13740
+ "model.layers.46.mlp.experts.45.up_proj.weight": "model-00033-of-00033.safetensors",
13741
+ "model.layers.46.mlp.experts.46.down_proj.weight": "model-00033-of-00033.safetensors",
13742
+ "model.layers.46.mlp.experts.46.gate_proj.weight": "model-00033-of-00033.safetensors",
13743
+ "model.layers.46.mlp.experts.46.up_proj.weight": "model-00033-of-00033.safetensors",
13744
+ "model.layers.46.mlp.experts.47.down_proj.weight": "model-00033-of-00033.safetensors",
13745
+ "model.layers.46.mlp.experts.47.gate_proj.weight": "model-00033-of-00033.safetensors",
13746
+ "model.layers.46.mlp.experts.47.up_proj.weight": "model-00033-of-00033.safetensors",
13747
+ "model.layers.46.mlp.experts.48.down_proj.weight": "model-00033-of-00033.safetensors",
13748
+ "model.layers.46.mlp.experts.48.gate_proj.weight": "model-00033-of-00033.safetensors",
13749
+ "model.layers.46.mlp.experts.48.up_proj.weight": "model-00033-of-00033.safetensors",
13750
+ "model.layers.46.mlp.experts.49.down_proj.weight": "model-00033-of-00033.safetensors",
13751
+ "model.layers.46.mlp.experts.49.gate_proj.weight": "model-00033-of-00033.safetensors",
13752
+ "model.layers.46.mlp.experts.49.up_proj.weight": "model-00033-of-00033.safetensors",
13753
+ "model.layers.46.mlp.experts.5.down_proj.weight": "model-00033-of-00033.safetensors",
13754
+ "model.layers.46.mlp.experts.5.gate_proj.weight": "model-00033-of-00033.safetensors",
13755
+ "model.layers.46.mlp.experts.5.up_proj.weight": "model-00033-of-00033.safetensors",
13756
+ "model.layers.46.mlp.experts.50.down_proj.weight": "model-00033-of-00033.safetensors",
13757
+ "model.layers.46.mlp.experts.50.gate_proj.weight": "model-00033-of-00033.safetensors",
13758
+ "model.layers.46.mlp.experts.50.up_proj.weight": "model-00033-of-00033.safetensors",
13759
+ "model.layers.46.mlp.experts.51.down_proj.weight": "model-00033-of-00033.safetensors",
13760
+ "model.layers.46.mlp.experts.51.gate_proj.weight": "model-00033-of-00033.safetensors",
13761
+ "model.layers.46.mlp.experts.51.up_proj.weight": "model-00033-of-00033.safetensors",
13762
+ "model.layers.46.mlp.experts.52.down_proj.weight": "model-00033-of-00033.safetensors",
13763
+ "model.layers.46.mlp.experts.52.gate_proj.weight": "model-00033-of-00033.safetensors",
13764
+ "model.layers.46.mlp.experts.52.up_proj.weight": "model-00033-of-00033.safetensors",
13765
+ "model.layers.46.mlp.experts.53.down_proj.weight": "model-00033-of-00033.safetensors",
13766
+ "model.layers.46.mlp.experts.53.gate_proj.weight": "model-00033-of-00033.safetensors",
13767
+ "model.layers.46.mlp.experts.53.up_proj.weight": "model-00033-of-00033.safetensors",
13768
+ "model.layers.46.mlp.experts.54.down_proj.weight": "model-00033-of-00033.safetensors",
13769
+ "model.layers.46.mlp.experts.54.gate_proj.weight": "model-00033-of-00033.safetensors",
13770
+ "model.layers.46.mlp.experts.54.up_proj.weight": "model-00033-of-00033.safetensors",
13771
+ "model.layers.46.mlp.experts.55.down_proj.weight": "model-00033-of-00033.safetensors",
13772
+ "model.layers.46.mlp.experts.55.gate_proj.weight": "model-00033-of-00033.safetensors",
13773
+ "model.layers.46.mlp.experts.55.up_proj.weight": "model-00033-of-00033.safetensors",
13774
+ "model.layers.46.mlp.experts.56.down_proj.weight": "model-00033-of-00033.safetensors",
13775
+ "model.layers.46.mlp.experts.56.gate_proj.weight": "model-00033-of-00033.safetensors",
13776
+ "model.layers.46.mlp.experts.56.up_proj.weight": "model-00033-of-00033.safetensors",
13777
+ "model.layers.46.mlp.experts.57.down_proj.weight": "model-00033-of-00033.safetensors",
13778
+ "model.layers.46.mlp.experts.57.gate_proj.weight": "model-00033-of-00033.safetensors",
13779
+ "model.layers.46.mlp.experts.57.up_proj.weight": "model-00033-of-00033.safetensors",
13780
+ "model.layers.46.mlp.experts.58.down_proj.weight": "model-00033-of-00033.safetensors",
13781
+ "model.layers.46.mlp.experts.58.gate_proj.weight": "model-00033-of-00033.safetensors",
13782
+ "model.layers.46.mlp.experts.58.up_proj.weight": "model-00033-of-00033.safetensors",
13783
+ "model.layers.46.mlp.experts.59.down_proj.weight": "model-00033-of-00033.safetensors",
13784
+ "model.layers.46.mlp.experts.59.gate_proj.weight": "model-00033-of-00033.safetensors",
13785
+ "model.layers.46.mlp.experts.59.up_proj.weight": "model-00033-of-00033.safetensors",
13786
+ "model.layers.46.mlp.experts.6.down_proj.weight": "model-00033-of-00033.safetensors",
13787
+ "model.layers.46.mlp.experts.6.gate_proj.weight": "model-00033-of-00033.safetensors",
13788
+ "model.layers.46.mlp.experts.6.up_proj.weight": "model-00033-of-00033.safetensors",
13789
+ "model.layers.46.mlp.experts.60.down_proj.weight": "model-00033-of-00033.safetensors",
13790
+ "model.layers.46.mlp.experts.60.gate_proj.weight": "model-00033-of-00033.safetensors",
13791
+ "model.layers.46.mlp.experts.60.up_proj.weight": "model-00033-of-00033.safetensors",
13792
+ "model.layers.46.mlp.experts.61.down_proj.weight": "model-00033-of-00033.safetensors",
13793
+ "model.layers.46.mlp.experts.61.gate_proj.weight": "model-00033-of-00033.safetensors",
13794
+ "model.layers.46.mlp.experts.61.up_proj.weight": "model-00033-of-00033.safetensors",
13795
+ "model.layers.46.mlp.experts.62.down_proj.weight": "model-00033-of-00033.safetensors",
13796
+ "model.layers.46.mlp.experts.62.gate_proj.weight": "model-00033-of-00033.safetensors",
13797
+ "model.layers.46.mlp.experts.62.up_proj.weight": "model-00033-of-00033.safetensors",
13798
+ "model.layers.46.mlp.experts.63.down_proj.weight": "model-00033-of-00033.safetensors",
13799
+ "model.layers.46.mlp.experts.63.gate_proj.weight": "model-00033-of-00033.safetensors",
13800
+ "model.layers.46.mlp.experts.63.up_proj.weight": "model-00033-of-00033.safetensors",
13801
+ "model.layers.46.mlp.experts.64.down_proj.weight": "model-00033-of-00033.safetensors",
13802
+ "model.layers.46.mlp.experts.64.gate_proj.weight": "model-00033-of-00033.safetensors",
13803
+ "model.layers.46.mlp.experts.64.up_proj.weight": "model-00033-of-00033.safetensors",
13804
+ "model.layers.46.mlp.experts.65.down_proj.weight": "model-00033-of-00033.safetensors",
13805
+ "model.layers.46.mlp.experts.65.gate_proj.weight": "model-00033-of-00033.safetensors",
13806
+ "model.layers.46.mlp.experts.65.up_proj.weight": "model-00033-of-00033.safetensors",
13807
+ "model.layers.46.mlp.experts.66.down_proj.weight": "model-00033-of-00033.safetensors",
13808
+ "model.layers.46.mlp.experts.66.gate_proj.weight": "model-00033-of-00033.safetensors",
13809
+ "model.layers.46.mlp.experts.66.up_proj.weight": "model-00033-of-00033.safetensors",
13810
+ "model.layers.46.mlp.experts.67.down_proj.weight": "model-00033-of-00033.safetensors",
13811
+ "model.layers.46.mlp.experts.67.gate_proj.weight": "model-00033-of-00033.safetensors",
13812
+ "model.layers.46.mlp.experts.67.up_proj.weight": "model-00033-of-00033.safetensors",
13813
+ "model.layers.46.mlp.experts.68.down_proj.weight": "model-00033-of-00033.safetensors",
13814
+ "model.layers.46.mlp.experts.68.gate_proj.weight": "model-00033-of-00033.safetensors",
13815
+ "model.layers.46.mlp.experts.68.up_proj.weight": "model-00033-of-00033.safetensors",
13816
+ "model.layers.46.mlp.experts.69.down_proj.weight": "model-00033-of-00033.safetensors",
13817
+ "model.layers.46.mlp.experts.69.gate_proj.weight": "model-00033-of-00033.safetensors",
13818
+ "model.layers.46.mlp.experts.69.up_proj.weight": "model-00033-of-00033.safetensors",
13819
+ "model.layers.46.mlp.experts.7.down_proj.weight": "model-00033-of-00033.safetensors",
13820
+ "model.layers.46.mlp.experts.7.gate_proj.weight": "model-00033-of-00033.safetensors",
13821
+ "model.layers.46.mlp.experts.7.up_proj.weight": "model-00033-of-00033.safetensors",
13822
+ "model.layers.46.mlp.experts.70.down_proj.weight": "model-00033-of-00033.safetensors",
13823
+ "model.layers.46.mlp.experts.70.gate_proj.weight": "model-00033-of-00033.safetensors",
13824
+ "model.layers.46.mlp.experts.70.up_proj.weight": "model-00033-of-00033.safetensors",
13825
+ "model.layers.46.mlp.experts.71.down_proj.weight": "model-00033-of-00033.safetensors",
13826
+ "model.layers.46.mlp.experts.71.gate_proj.weight": "model-00033-of-00033.safetensors",
13827
+ "model.layers.46.mlp.experts.71.up_proj.weight": "model-00033-of-00033.safetensors",
13828
+ "model.layers.46.mlp.experts.72.down_proj.weight": "model-00033-of-00033.safetensors",
13829
+ "model.layers.46.mlp.experts.72.gate_proj.weight": "model-00033-of-00033.safetensors",
13830
+ "model.layers.46.mlp.experts.72.up_proj.weight": "model-00033-of-00033.safetensors",
13831
+ "model.layers.46.mlp.experts.73.down_proj.weight": "model-00033-of-00033.safetensors",
13832
+ "model.layers.46.mlp.experts.73.gate_proj.weight": "model-00033-of-00033.safetensors",
13833
+ "model.layers.46.mlp.experts.73.up_proj.weight": "model-00033-of-00033.safetensors",
13834
+ "model.layers.46.mlp.experts.74.down_proj.weight": "model-00033-of-00033.safetensors",
13835
+ "model.layers.46.mlp.experts.74.gate_proj.weight": "model-00033-of-00033.safetensors",
13836
+ "model.layers.46.mlp.experts.74.up_proj.weight": "model-00033-of-00033.safetensors",
13837
+ "model.layers.46.mlp.experts.75.down_proj.weight": "model-00033-of-00033.safetensors",
13838
+ "model.layers.46.mlp.experts.75.gate_proj.weight": "model-00033-of-00033.safetensors",
13839
+ "model.layers.46.mlp.experts.75.up_proj.weight": "model-00033-of-00033.safetensors",
13840
+ "model.layers.46.mlp.experts.76.down_proj.weight": "model-00033-of-00033.safetensors",
13841
+ "model.layers.46.mlp.experts.76.gate_proj.weight": "model-00033-of-00033.safetensors",
13842
+ "model.layers.46.mlp.experts.76.up_proj.weight": "model-00033-of-00033.safetensors",
13843
+ "model.layers.46.mlp.experts.77.down_proj.weight": "model-00033-of-00033.safetensors",
13844
+ "model.layers.46.mlp.experts.77.gate_proj.weight": "model-00033-of-00033.safetensors",
13845
+ "model.layers.46.mlp.experts.77.up_proj.weight": "model-00033-of-00033.safetensors",
13846
+ "model.layers.46.mlp.experts.78.down_proj.weight": "model-00033-of-00033.safetensors",
13847
+ "model.layers.46.mlp.experts.78.gate_proj.weight": "model-00033-of-00033.safetensors",
13848
+ "model.layers.46.mlp.experts.78.up_proj.weight": "model-00033-of-00033.safetensors",
13849
+ "model.layers.46.mlp.experts.79.down_proj.weight": "model-00033-of-00033.safetensors",
13850
+ "model.layers.46.mlp.experts.79.gate_proj.weight": "model-00033-of-00033.safetensors",
13851
+ "model.layers.46.mlp.experts.79.up_proj.weight": "model-00033-of-00033.safetensors",
13852
+ "model.layers.46.mlp.experts.8.down_proj.weight": "model-00033-of-00033.safetensors",
13853
+ "model.layers.46.mlp.experts.8.gate_proj.weight": "model-00033-of-00033.safetensors",
13854
+ "model.layers.46.mlp.experts.8.up_proj.weight": "model-00033-of-00033.safetensors",
13855
+ "model.layers.46.mlp.experts.80.down_proj.weight": "model-00033-of-00033.safetensors",
13856
+ "model.layers.46.mlp.experts.80.gate_proj.weight": "model-00033-of-00033.safetensors",
13857
+ "model.layers.46.mlp.experts.80.up_proj.weight": "model-00033-of-00033.safetensors",
13858
+ "model.layers.46.mlp.experts.81.down_proj.weight": "model-00033-of-00033.safetensors",
13859
+ "model.layers.46.mlp.experts.81.gate_proj.weight": "model-00033-of-00033.safetensors",
13860
+ "model.layers.46.mlp.experts.81.up_proj.weight": "model-00033-of-00033.safetensors",
13861
+ "model.layers.46.mlp.experts.82.down_proj.weight": "model-00033-of-00033.safetensors",
13862
+ "model.layers.46.mlp.experts.82.gate_proj.weight": "model-00033-of-00033.safetensors",
13863
+ "model.layers.46.mlp.experts.82.up_proj.weight": "model-00033-of-00033.safetensors",
13864
+ "model.layers.46.mlp.experts.83.down_proj.weight": "model-00033-of-00033.safetensors",
13865
+ "model.layers.46.mlp.experts.83.gate_proj.weight": "model-00033-of-00033.safetensors",
13866
+ "model.layers.46.mlp.experts.83.up_proj.weight": "model-00033-of-00033.safetensors",
13867
+ "model.layers.46.mlp.experts.84.down_proj.weight": "model-00033-of-00033.safetensors",
13868
+ "model.layers.46.mlp.experts.84.gate_proj.weight": "model-00033-of-00033.safetensors",
13869
+ "model.layers.46.mlp.experts.84.up_proj.weight": "model-00033-of-00033.safetensors",
13870
+ "model.layers.46.mlp.experts.85.down_proj.weight": "model-00033-of-00033.safetensors",
13871
+ "model.layers.46.mlp.experts.85.gate_proj.weight": "model-00033-of-00033.safetensors",
13872
+ "model.layers.46.mlp.experts.85.up_proj.weight": "model-00033-of-00033.safetensors",
13873
+ "model.layers.46.mlp.experts.86.down_proj.weight": "model-00033-of-00033.safetensors",
13874
+ "model.layers.46.mlp.experts.86.gate_proj.weight": "model-00033-of-00033.safetensors",
13875
+ "model.layers.46.mlp.experts.86.up_proj.weight": "model-00033-of-00033.safetensors",
13876
+ "model.layers.46.mlp.experts.87.down_proj.weight": "model-00033-of-00033.safetensors",
13877
+ "model.layers.46.mlp.experts.87.gate_proj.weight": "model-00033-of-00033.safetensors",
13878
+ "model.layers.46.mlp.experts.87.up_proj.weight": "model-00033-of-00033.safetensors",
13879
+ "model.layers.46.mlp.experts.88.down_proj.weight": "model-00033-of-00033.safetensors",
13880
+ "model.layers.46.mlp.experts.88.gate_proj.weight": "model-00033-of-00033.safetensors",
13881
+ "model.layers.46.mlp.experts.88.up_proj.weight": "model-00033-of-00033.safetensors",
13882
+ "model.layers.46.mlp.experts.89.down_proj.weight": "model-00033-of-00033.safetensors",
13883
+ "model.layers.46.mlp.experts.89.gate_proj.weight": "model-00033-of-00033.safetensors",
13884
+ "model.layers.46.mlp.experts.89.up_proj.weight": "model-00033-of-00033.safetensors",
13885
+ "model.layers.46.mlp.experts.9.down_proj.weight": "model-00033-of-00033.safetensors",
13886
+ "model.layers.46.mlp.experts.9.gate_proj.weight": "model-00033-of-00033.safetensors",
13887
+ "model.layers.46.mlp.experts.9.up_proj.weight": "model-00033-of-00033.safetensors",
13888
+ "model.layers.46.mlp.experts.90.down_proj.weight": "model-00033-of-00033.safetensors",
13889
+ "model.layers.46.mlp.experts.90.gate_proj.weight": "model-00033-of-00033.safetensors",
13890
+ "model.layers.46.mlp.experts.90.up_proj.weight": "model-00033-of-00033.safetensors",
13891
+ "model.layers.46.mlp.experts.91.down_proj.weight": "model-00033-of-00033.safetensors",
13892
+ "model.layers.46.mlp.experts.91.gate_proj.weight": "model-00033-of-00033.safetensors",
13893
+ "model.layers.46.mlp.experts.91.up_proj.weight": "model-00033-of-00033.safetensors",
13894
+ "model.layers.46.mlp.experts.92.down_proj.weight": "model-00033-of-00033.safetensors",
13895
+ "model.layers.46.mlp.experts.92.gate_proj.weight": "model-00033-of-00033.safetensors",
13896
+ "model.layers.46.mlp.experts.92.up_proj.weight": "model-00033-of-00033.safetensors",
13897
+ "model.layers.46.mlp.experts.93.down_proj.weight": "model-00033-of-00033.safetensors",
13898
+ "model.layers.46.mlp.experts.93.gate_proj.weight": "model-00033-of-00033.safetensors",
13899
+ "model.layers.46.mlp.experts.93.up_proj.weight": "model-00033-of-00033.safetensors",
13900
+ "model.layers.46.mlp.experts.94.down_proj.weight": "model-00033-of-00033.safetensors",
13901
+ "model.layers.46.mlp.experts.94.gate_proj.weight": "model-00033-of-00033.safetensors",
13902
+ "model.layers.46.mlp.experts.94.up_proj.weight": "model-00033-of-00033.safetensors",
13903
+ "model.layers.46.mlp.experts.95.down_proj.weight": "model-00033-of-00033.safetensors",
13904
+ "model.layers.46.mlp.experts.95.gate_proj.weight": "model-00033-of-00033.safetensors",
13905
+ "model.layers.46.mlp.experts.95.up_proj.weight": "model-00033-of-00033.safetensors",
13906
+ "model.layers.46.mlp.gate.e_score_correction_bias": "model-00033-of-00033.safetensors",
13907
+ "model.layers.46.mlp.gate.weight": "model-00033-of-00033.safetensors",
13908
+ "model.layers.46.mlp.shared_experts.down_proj.weight": "model-00033-of-00033.safetensors",
13909
+ "model.layers.46.mlp.shared_experts.gate_proj.weight": "model-00033-of-00033.safetensors",
13910
+ "model.layers.46.mlp.shared_experts.up_proj.weight": "model-00033-of-00033.safetensors",
13911
+ "model.layers.46.post_attention_layernorm.weight": "model-00033-of-00033.safetensors",
13912
+ "model.layers.46.self_attn.k_proj.bias": "model-00033-of-00033.safetensors",
13913
+ "model.layers.46.self_attn.k_proj.weight": "model-00033-of-00033.safetensors",
13914
+ "model.layers.46.self_attn.o_proj.weight": "model-00033-of-00033.safetensors",
13915
+ "model.layers.46.self_attn.q_proj.bias": "model-00033-of-00033.safetensors",
13916
+ "model.layers.46.self_attn.q_proj.weight": "model-00033-of-00033.safetensors",
13917
+ "model.layers.46.self_attn.v_proj.bias": "model-00033-of-00033.safetensors",
13918
+ "model.layers.46.self_attn.v_proj.weight": "model-00033-of-00033.safetensors",
13919
+ "model.layers.46.shared_head.norm.weight": "model-00033-of-00033.safetensors"
13920
  }
13921
+ }