danielhanchen commited on
Commit
e09b55c
·
verified ·
1 Parent(s): e481abd

Upload folder using huggingface_hub

Browse files
chat_template.jinja CHANGED
@@ -340,8 +340,5 @@
340
  {%- if add_generation_prompt -%}
341
  {%- if ns.prev_message_type != 'tool_response' and ns.prev_message_type != 'tool_call' -%}
342
  {{- '<|turn>model\n' -}}
343
- {%- if not enable_thinking | default(false) -%}
344
- {{- '<|channel>thought\n<channel|>' -}}
345
- {%- endif -%}
346
  {%- endif -%}
347
- {%- endif -%}
 
340
  {%- if add_generation_prompt -%}
341
  {%- if ns.prev_message_type != 'tool_response' and ns.prev_message_type != 'tool_call' -%}
342
  {{- '<|turn>model\n' -}}
 
 
 
343
  {%- endif -%}
344
+ {%- endif -%}
config.json CHANGED
@@ -60,6 +60,7 @@
60
  "quantization": {
61
  "group_size": 64,
62
  "bits": 8,
 
63
  "language_model.model.embed_tokens": {
64
  "group_size": 64,
65
  "bits": 8
@@ -84,11 +85,11 @@
84
  "group_size": 64,
85
  "bits": 8
86
  },
87
- "language_model.model.layers.0.mlp.up_proj": {
88
  "group_size": 64,
89
  "bits": 8
90
  },
91
- "language_model.model.layers.0.mlp.down_proj": {
92
  "group_size": 64,
93
  "bits": 8
94
  },
@@ -120,11 +121,11 @@
120
  "group_size": 64,
121
  "bits": 8
122
  },
123
- "language_model.model.layers.1.mlp.up_proj": {
124
  "group_size": 64,
125
  "bits": 8
126
  },
127
- "language_model.model.layers.1.mlp.down_proj": {
128
  "group_size": 64,
129
  "bits": 8
130
  },
@@ -156,11 +157,11 @@
156
  "group_size": 64,
157
  "bits": 8
158
  },
159
- "language_model.model.layers.2.mlp.up_proj": {
160
  "group_size": 64,
161
  "bits": 8
162
  },
163
- "language_model.model.layers.2.mlp.down_proj": {
164
  "group_size": 64,
165
  "bits": 8
166
  },
@@ -192,11 +193,11 @@
192
  "group_size": 64,
193
  "bits": 8
194
  },
195
- "language_model.model.layers.3.mlp.up_proj": {
196
  "group_size": 64,
197
  "bits": 8
198
  },
199
- "language_model.model.layers.3.mlp.down_proj": {
200
  "group_size": 64,
201
  "bits": 8
202
  },
@@ -228,11 +229,11 @@
228
  "group_size": 64,
229
  "bits": 8
230
  },
231
- "language_model.model.layers.4.mlp.up_proj": {
232
  "group_size": 64,
233
  "bits": 8
234
  },
235
- "language_model.model.layers.4.mlp.down_proj": {
236
  "group_size": 64,
237
  "bits": 8
238
  },
@@ -264,11 +265,11 @@
264
  "group_size": 64,
265
  "bits": 8
266
  },
267
- "language_model.model.layers.5.mlp.up_proj": {
268
  "group_size": 64,
269
  "bits": 8
270
  },
271
- "language_model.model.layers.5.mlp.down_proj": {
272
  "group_size": 64,
273
  "bits": 8
274
  },
@@ -300,11 +301,11 @@
300
  "group_size": 64,
301
  "bits": 8
302
  },
303
- "language_model.model.layers.6.mlp.up_proj": {
304
  "group_size": 64,
305
  "bits": 8
306
  },
307
- "language_model.model.layers.6.mlp.down_proj": {
308
  "group_size": 64,
309
  "bits": 8
310
  },
@@ -336,11 +337,11 @@
336
  "group_size": 64,
337
  "bits": 8
338
  },
339
- "language_model.model.layers.7.mlp.up_proj": {
340
  "group_size": 64,
341
  "bits": 8
342
  },
343
- "language_model.model.layers.7.mlp.down_proj": {
344
  "group_size": 64,
345
  "bits": 8
346
  },
@@ -372,11 +373,11 @@
372
  "group_size": 64,
373
  "bits": 8
374
  },
375
- "language_model.model.layers.8.mlp.up_proj": {
376
  "group_size": 64,
377
  "bits": 8
378
  },
379
- "language_model.model.layers.8.mlp.down_proj": {
380
  "group_size": 64,
381
  "bits": 8
382
  },
@@ -408,11 +409,11 @@
408
  "group_size": 64,
409
  "bits": 8
410
  },
411
- "language_model.model.layers.9.mlp.up_proj": {
412
  "group_size": 64,
413
  "bits": 8
414
  },
415
- "language_model.model.layers.9.mlp.down_proj": {
416
  "group_size": 64,
417
  "bits": 8
418
  },
@@ -444,11 +445,11 @@
444
  "group_size": 64,
445
  "bits": 8
446
  },
447
- "language_model.model.layers.10.mlp.up_proj": {
448
  "group_size": 64,
449
  "bits": 8
450
  },
451
- "language_model.model.layers.10.mlp.down_proj": {
452
  "group_size": 64,
453
  "bits": 8
454
  },
@@ -480,11 +481,11 @@
480
  "group_size": 64,
481
  "bits": 8
482
  },
483
- "language_model.model.layers.11.mlp.up_proj": {
484
  "group_size": 64,
485
  "bits": 8
486
  },
487
- "language_model.model.layers.11.mlp.down_proj": {
488
  "group_size": 64,
489
  "bits": 8
490
  },
@@ -516,11 +517,11 @@
516
  "group_size": 64,
517
  "bits": 8
518
  },
519
- "language_model.model.layers.12.mlp.up_proj": {
520
  "group_size": 64,
521
  "bits": 8
522
  },
523
- "language_model.model.layers.12.mlp.down_proj": {
524
  "group_size": 64,
525
  "bits": 8
526
  },
@@ -552,11 +553,11 @@
552
  "group_size": 64,
553
  "bits": 8
554
  },
555
- "language_model.model.layers.13.mlp.up_proj": {
556
  "group_size": 64,
557
  "bits": 8
558
  },
559
- "language_model.model.layers.13.mlp.down_proj": {
560
  "group_size": 64,
561
  "bits": 8
562
  },
@@ -588,11 +589,11 @@
588
  "group_size": 64,
589
  "bits": 8
590
  },
591
- "language_model.model.layers.14.mlp.up_proj": {
592
  "group_size": 64,
593
  "bits": 8
594
  },
595
- "language_model.model.layers.14.mlp.down_proj": {
596
  "group_size": 64,
597
  "bits": 8
598
  },
@@ -624,11 +625,11 @@
624
  "group_size": 64,
625
  "bits": 8
626
  },
627
- "language_model.model.layers.15.mlp.up_proj": {
628
  "group_size": 64,
629
  "bits": 8
630
  },
631
- "language_model.model.layers.15.mlp.down_proj": {
632
  "group_size": 64,
633
  "bits": 8
634
  },
@@ -660,11 +661,11 @@
660
  "group_size": 64,
661
  "bits": 8
662
  },
663
- "language_model.model.layers.16.mlp.up_proj": {
664
  "group_size": 64,
665
  "bits": 8
666
  },
667
- "language_model.model.layers.16.mlp.down_proj": {
668
  "group_size": 64,
669
  "bits": 8
670
  },
@@ -696,11 +697,11 @@
696
  "group_size": 64,
697
  "bits": 8
698
  },
699
- "language_model.model.layers.17.mlp.up_proj": {
700
  "group_size": 64,
701
  "bits": 8
702
  },
703
- "language_model.model.layers.17.mlp.down_proj": {
704
  "group_size": 64,
705
  "bits": 8
706
  },
@@ -732,11 +733,11 @@
732
  "group_size": 64,
733
  "bits": 8
734
  },
735
- "language_model.model.layers.18.mlp.up_proj": {
736
  "group_size": 64,
737
  "bits": 8
738
  },
739
- "language_model.model.layers.18.mlp.down_proj": {
740
  "group_size": 64,
741
  "bits": 8
742
  },
@@ -768,11 +769,11 @@
768
  "group_size": 64,
769
  "bits": 8
770
  },
771
- "language_model.model.layers.19.mlp.up_proj": {
772
  "group_size": 64,
773
  "bits": 8
774
  },
775
- "language_model.model.layers.19.mlp.down_proj": {
776
  "group_size": 64,
777
  "bits": 8
778
  },
@@ -804,11 +805,11 @@
804
  "group_size": 64,
805
  "bits": 8
806
  },
807
- "language_model.model.layers.20.mlp.up_proj": {
808
  "group_size": 64,
809
  "bits": 8
810
  },
811
- "language_model.model.layers.20.mlp.down_proj": {
812
  "group_size": 64,
813
  "bits": 8
814
  },
@@ -840,11 +841,11 @@
840
  "group_size": 64,
841
  "bits": 8
842
  },
843
- "language_model.model.layers.21.mlp.up_proj": {
844
  "group_size": 64,
845
  "bits": 8
846
  },
847
- "language_model.model.layers.21.mlp.down_proj": {
848
  "group_size": 64,
849
  "bits": 8
850
  },
@@ -876,11 +877,11 @@
876
  "group_size": 64,
877
  "bits": 8
878
  },
879
- "language_model.model.layers.22.mlp.up_proj": {
880
  "group_size": 64,
881
  "bits": 8
882
  },
883
- "language_model.model.layers.22.mlp.down_proj": {
884
  "group_size": 64,
885
  "bits": 8
886
  },
@@ -912,11 +913,11 @@
912
  "group_size": 64,
913
  "bits": 8
914
  },
915
- "language_model.model.layers.23.mlp.up_proj": {
916
  "group_size": 64,
917
  "bits": 8
918
  },
919
- "language_model.model.layers.23.mlp.down_proj": {
920
  "group_size": 64,
921
  "bits": 8
922
  },
@@ -948,11 +949,11 @@
948
  "group_size": 64,
949
  "bits": 8
950
  },
951
- "language_model.model.layers.24.mlp.up_proj": {
952
  "group_size": 64,
953
  "bits": 8
954
  },
955
- "language_model.model.layers.24.mlp.down_proj": {
956
  "group_size": 64,
957
  "bits": 8
958
  },
@@ -984,11 +985,11 @@
984
  "group_size": 64,
985
  "bits": 8
986
  },
987
- "language_model.model.layers.25.mlp.up_proj": {
988
  "group_size": 64,
989
  "bits": 8
990
  },
991
- "language_model.model.layers.25.mlp.down_proj": {
992
  "group_size": 64,
993
  "bits": 8
994
  },
@@ -1020,11 +1021,11 @@
1020
  "group_size": 64,
1021
  "bits": 8
1022
  },
1023
- "language_model.model.layers.26.mlp.up_proj": {
1024
  "group_size": 64,
1025
  "bits": 8
1026
  },
1027
- "language_model.model.layers.26.mlp.down_proj": {
1028
  "group_size": 64,
1029
  "bits": 8
1030
  },
@@ -1056,11 +1057,11 @@
1056
  "group_size": 64,
1057
  "bits": 8
1058
  },
1059
- "language_model.model.layers.27.mlp.up_proj": {
1060
  "group_size": 64,
1061
  "bits": 8
1062
  },
1063
- "language_model.model.layers.27.mlp.down_proj": {
1064
  "group_size": 64,
1065
  "bits": 8
1066
  },
@@ -1092,11 +1093,11 @@
1092
  "group_size": 64,
1093
  "bits": 8
1094
  },
1095
- "language_model.model.layers.28.mlp.up_proj": {
1096
  "group_size": 64,
1097
  "bits": 8
1098
  },
1099
- "language_model.model.layers.28.mlp.down_proj": {
1100
  "group_size": 64,
1101
  "bits": 8
1102
  },
@@ -1128,11 +1129,11 @@
1128
  "group_size": 64,
1129
  "bits": 8
1130
  },
1131
- "language_model.model.layers.29.mlp.up_proj": {
1132
  "group_size": 64,
1133
  "bits": 8
1134
  },
1135
- "language_model.model.layers.29.mlp.down_proj": {
1136
  "group_size": 64,
1137
  "bits": 8
1138
  },
@@ -1164,11 +1165,11 @@
1164
  "group_size": 64,
1165
  "bits": 8
1166
  },
1167
- "language_model.model.layers.30.mlp.up_proj": {
1168
  "group_size": 64,
1169
  "bits": 8
1170
  },
1171
- "language_model.model.layers.30.mlp.down_proj": {
1172
  "group_size": 64,
1173
  "bits": 8
1174
  },
@@ -1200,11 +1201,11 @@
1200
  "group_size": 64,
1201
  "bits": 8
1202
  },
1203
- "language_model.model.layers.31.mlp.up_proj": {
1204
  "group_size": 64,
1205
  "bits": 8
1206
  },
1207
- "language_model.model.layers.31.mlp.down_proj": {
1208
  "group_size": 64,
1209
  "bits": 8
1210
  },
@@ -1236,11 +1237,11 @@
1236
  "group_size": 64,
1237
  "bits": 8
1238
  },
1239
- "language_model.model.layers.32.mlp.up_proj": {
1240
  "group_size": 64,
1241
  "bits": 8
1242
  },
1243
- "language_model.model.layers.32.mlp.down_proj": {
1244
  "group_size": 64,
1245
  "bits": 8
1246
  },
@@ -1272,11 +1273,11 @@
1272
  "group_size": 64,
1273
  "bits": 8
1274
  },
1275
- "language_model.model.layers.33.mlp.up_proj": {
1276
  "group_size": 64,
1277
  "bits": 8
1278
  },
1279
- "language_model.model.layers.33.mlp.down_proj": {
1280
  "group_size": 64,
1281
  "bits": 8
1282
  },
@@ -1308,11 +1309,11 @@
1308
  "group_size": 64,
1309
  "bits": 8
1310
  },
1311
- "language_model.model.layers.34.mlp.up_proj": {
1312
  "group_size": 64,
1313
  "bits": 8
1314
  },
1315
- "language_model.model.layers.34.mlp.down_proj": {
1316
  "group_size": 64,
1317
  "bits": 8
1318
  },
@@ -1344,11 +1345,11 @@
1344
  "group_size": 64,
1345
  "bits": 8
1346
  },
1347
- "language_model.model.layers.35.mlp.up_proj": {
1348
  "group_size": 64,
1349
  "bits": 8
1350
  },
1351
- "language_model.model.layers.35.mlp.down_proj": {
1352
  "group_size": 64,
1353
  "bits": 8
1354
  },
@@ -1380,11 +1381,11 @@
1380
  "group_size": 64,
1381
  "bits": 8
1382
  },
1383
- "language_model.model.layers.36.mlp.up_proj": {
1384
  "group_size": 64,
1385
  "bits": 8
1386
  },
1387
- "language_model.model.layers.36.mlp.down_proj": {
1388
  "group_size": 64,
1389
  "bits": 8
1390
  },
@@ -1416,11 +1417,11 @@
1416
  "group_size": 64,
1417
  "bits": 8
1418
  },
1419
- "language_model.model.layers.37.mlp.up_proj": {
1420
  "group_size": 64,
1421
  "bits": 8
1422
  },
1423
- "language_model.model.layers.37.mlp.down_proj": {
1424
  "group_size": 64,
1425
  "bits": 8
1426
  },
@@ -1452,11 +1453,11 @@
1452
  "group_size": 64,
1453
  "bits": 8
1454
  },
1455
- "language_model.model.layers.38.mlp.up_proj": {
1456
  "group_size": 64,
1457
  "bits": 8
1458
  },
1459
- "language_model.model.layers.38.mlp.down_proj": {
1460
  "group_size": 64,
1461
  "bits": 8
1462
  },
@@ -1488,11 +1489,11 @@
1488
  "group_size": 64,
1489
  "bits": 8
1490
  },
1491
- "language_model.model.layers.39.mlp.up_proj": {
1492
  "group_size": 64,
1493
  "bits": 8
1494
  },
1495
- "language_model.model.layers.39.mlp.down_proj": {
1496
  "group_size": 64,
1497
  "bits": 8
1498
  },
@@ -1524,11 +1525,11 @@
1524
  "group_size": 64,
1525
  "bits": 8
1526
  },
1527
- "language_model.model.layers.40.mlp.up_proj": {
1528
  "group_size": 64,
1529
  "bits": 8
1530
  },
1531
- "language_model.model.layers.40.mlp.down_proj": {
1532
  "group_size": 64,
1533
  "bits": 8
1534
  },
@@ -1560,11 +1561,11 @@
1560
  "group_size": 64,
1561
  "bits": 8
1562
  },
1563
- "language_model.model.layers.41.mlp.up_proj": {
1564
  "group_size": 64,
1565
  "bits": 8
1566
  },
1567
- "language_model.model.layers.41.mlp.down_proj": {
1568
  "group_size": 64,
1569
  "bits": 8
1570
  },
@@ -1579,15 +1580,12 @@
1579
  "language_model.model.embed_tokens_per_layer": {
1580
  "group_size": 64,
1581
  "bits": 8
1582
- },
1583
- "language_model.model.per_layer_model_projection": {
1584
- "group_size": 64,
1585
- "bits": 8
1586
  }
1587
  },
1588
  "quantization_config": {
1589
  "group_size": 64,
1590
  "bits": 8,
 
1591
  "language_model.model.embed_tokens": {
1592
  "group_size": 64,
1593
  "bits": 8
@@ -1612,11 +1610,11 @@
1612
  "group_size": 64,
1613
  "bits": 8
1614
  },
1615
- "language_model.model.layers.0.mlp.up_proj": {
1616
  "group_size": 64,
1617
  "bits": 8
1618
  },
1619
- "language_model.model.layers.0.mlp.down_proj": {
1620
  "group_size": 64,
1621
  "bits": 8
1622
  },
@@ -1648,11 +1646,11 @@
1648
  "group_size": 64,
1649
  "bits": 8
1650
  },
1651
- "language_model.model.layers.1.mlp.up_proj": {
1652
  "group_size": 64,
1653
  "bits": 8
1654
  },
1655
- "language_model.model.layers.1.mlp.down_proj": {
1656
  "group_size": 64,
1657
  "bits": 8
1658
  },
@@ -1684,11 +1682,11 @@
1684
  "group_size": 64,
1685
  "bits": 8
1686
  },
1687
- "language_model.model.layers.2.mlp.up_proj": {
1688
  "group_size": 64,
1689
  "bits": 8
1690
  },
1691
- "language_model.model.layers.2.mlp.down_proj": {
1692
  "group_size": 64,
1693
  "bits": 8
1694
  },
@@ -1720,11 +1718,11 @@
1720
  "group_size": 64,
1721
  "bits": 8
1722
  },
1723
- "language_model.model.layers.3.mlp.up_proj": {
1724
  "group_size": 64,
1725
  "bits": 8
1726
  },
1727
- "language_model.model.layers.3.mlp.down_proj": {
1728
  "group_size": 64,
1729
  "bits": 8
1730
  },
@@ -1756,11 +1754,11 @@
1756
  "group_size": 64,
1757
  "bits": 8
1758
  },
1759
- "language_model.model.layers.4.mlp.up_proj": {
1760
  "group_size": 64,
1761
  "bits": 8
1762
  },
1763
- "language_model.model.layers.4.mlp.down_proj": {
1764
  "group_size": 64,
1765
  "bits": 8
1766
  },
@@ -1792,11 +1790,11 @@
1792
  "group_size": 64,
1793
  "bits": 8
1794
  },
1795
- "language_model.model.layers.5.mlp.up_proj": {
1796
  "group_size": 64,
1797
  "bits": 8
1798
  },
1799
- "language_model.model.layers.5.mlp.down_proj": {
1800
  "group_size": 64,
1801
  "bits": 8
1802
  },
@@ -1828,11 +1826,11 @@
1828
  "group_size": 64,
1829
  "bits": 8
1830
  },
1831
- "language_model.model.layers.6.mlp.up_proj": {
1832
  "group_size": 64,
1833
  "bits": 8
1834
  },
1835
- "language_model.model.layers.6.mlp.down_proj": {
1836
  "group_size": 64,
1837
  "bits": 8
1838
  },
@@ -1864,11 +1862,11 @@
1864
  "group_size": 64,
1865
  "bits": 8
1866
  },
1867
- "language_model.model.layers.7.mlp.up_proj": {
1868
  "group_size": 64,
1869
  "bits": 8
1870
  },
1871
- "language_model.model.layers.7.mlp.down_proj": {
1872
  "group_size": 64,
1873
  "bits": 8
1874
  },
@@ -1900,11 +1898,11 @@
1900
  "group_size": 64,
1901
  "bits": 8
1902
  },
1903
- "language_model.model.layers.8.mlp.up_proj": {
1904
  "group_size": 64,
1905
  "bits": 8
1906
  },
1907
- "language_model.model.layers.8.mlp.down_proj": {
1908
  "group_size": 64,
1909
  "bits": 8
1910
  },
@@ -1936,11 +1934,11 @@
1936
  "group_size": 64,
1937
  "bits": 8
1938
  },
1939
- "language_model.model.layers.9.mlp.up_proj": {
1940
  "group_size": 64,
1941
  "bits": 8
1942
  },
1943
- "language_model.model.layers.9.mlp.down_proj": {
1944
  "group_size": 64,
1945
  "bits": 8
1946
  },
@@ -1972,11 +1970,11 @@
1972
  "group_size": 64,
1973
  "bits": 8
1974
  },
1975
- "language_model.model.layers.10.mlp.up_proj": {
1976
  "group_size": 64,
1977
  "bits": 8
1978
  },
1979
- "language_model.model.layers.10.mlp.down_proj": {
1980
  "group_size": 64,
1981
  "bits": 8
1982
  },
@@ -2008,11 +2006,11 @@
2008
  "group_size": 64,
2009
  "bits": 8
2010
  },
2011
- "language_model.model.layers.11.mlp.up_proj": {
2012
  "group_size": 64,
2013
  "bits": 8
2014
  },
2015
- "language_model.model.layers.11.mlp.down_proj": {
2016
  "group_size": 64,
2017
  "bits": 8
2018
  },
@@ -2044,11 +2042,11 @@
2044
  "group_size": 64,
2045
  "bits": 8
2046
  },
2047
- "language_model.model.layers.12.mlp.up_proj": {
2048
  "group_size": 64,
2049
  "bits": 8
2050
  },
2051
- "language_model.model.layers.12.mlp.down_proj": {
2052
  "group_size": 64,
2053
  "bits": 8
2054
  },
@@ -2080,11 +2078,11 @@
2080
  "group_size": 64,
2081
  "bits": 8
2082
  },
2083
- "language_model.model.layers.13.mlp.up_proj": {
2084
  "group_size": 64,
2085
  "bits": 8
2086
  },
2087
- "language_model.model.layers.13.mlp.down_proj": {
2088
  "group_size": 64,
2089
  "bits": 8
2090
  },
@@ -2116,11 +2114,11 @@
2116
  "group_size": 64,
2117
  "bits": 8
2118
  },
2119
- "language_model.model.layers.14.mlp.up_proj": {
2120
  "group_size": 64,
2121
  "bits": 8
2122
  },
2123
- "language_model.model.layers.14.mlp.down_proj": {
2124
  "group_size": 64,
2125
  "bits": 8
2126
  },
@@ -2152,11 +2150,11 @@
2152
  "group_size": 64,
2153
  "bits": 8
2154
  },
2155
- "language_model.model.layers.15.mlp.up_proj": {
2156
  "group_size": 64,
2157
  "bits": 8
2158
  },
2159
- "language_model.model.layers.15.mlp.down_proj": {
2160
  "group_size": 64,
2161
  "bits": 8
2162
  },
@@ -2188,11 +2186,11 @@
2188
  "group_size": 64,
2189
  "bits": 8
2190
  },
2191
- "language_model.model.layers.16.mlp.up_proj": {
2192
  "group_size": 64,
2193
  "bits": 8
2194
  },
2195
- "language_model.model.layers.16.mlp.down_proj": {
2196
  "group_size": 64,
2197
  "bits": 8
2198
  },
@@ -2224,11 +2222,11 @@
2224
  "group_size": 64,
2225
  "bits": 8
2226
  },
2227
- "language_model.model.layers.17.mlp.up_proj": {
2228
  "group_size": 64,
2229
  "bits": 8
2230
  },
2231
- "language_model.model.layers.17.mlp.down_proj": {
2232
  "group_size": 64,
2233
  "bits": 8
2234
  },
@@ -2260,11 +2258,11 @@
2260
  "group_size": 64,
2261
  "bits": 8
2262
  },
2263
- "language_model.model.layers.18.mlp.up_proj": {
2264
  "group_size": 64,
2265
  "bits": 8
2266
  },
2267
- "language_model.model.layers.18.mlp.down_proj": {
2268
  "group_size": 64,
2269
  "bits": 8
2270
  },
@@ -2296,11 +2294,11 @@
2296
  "group_size": 64,
2297
  "bits": 8
2298
  },
2299
- "language_model.model.layers.19.mlp.up_proj": {
2300
  "group_size": 64,
2301
  "bits": 8
2302
  },
2303
- "language_model.model.layers.19.mlp.down_proj": {
2304
  "group_size": 64,
2305
  "bits": 8
2306
  },
@@ -2332,11 +2330,11 @@
2332
  "group_size": 64,
2333
  "bits": 8
2334
  },
2335
- "language_model.model.layers.20.mlp.up_proj": {
2336
  "group_size": 64,
2337
  "bits": 8
2338
  },
2339
- "language_model.model.layers.20.mlp.down_proj": {
2340
  "group_size": 64,
2341
  "bits": 8
2342
  },
@@ -2368,11 +2366,11 @@
2368
  "group_size": 64,
2369
  "bits": 8
2370
  },
2371
- "language_model.model.layers.21.mlp.up_proj": {
2372
  "group_size": 64,
2373
  "bits": 8
2374
  },
2375
- "language_model.model.layers.21.mlp.down_proj": {
2376
  "group_size": 64,
2377
  "bits": 8
2378
  },
@@ -2404,11 +2402,11 @@
2404
  "group_size": 64,
2405
  "bits": 8
2406
  },
2407
- "language_model.model.layers.22.mlp.up_proj": {
2408
  "group_size": 64,
2409
  "bits": 8
2410
  },
2411
- "language_model.model.layers.22.mlp.down_proj": {
2412
  "group_size": 64,
2413
  "bits": 8
2414
  },
@@ -2440,11 +2438,11 @@
2440
  "group_size": 64,
2441
  "bits": 8
2442
  },
2443
- "language_model.model.layers.23.mlp.up_proj": {
2444
  "group_size": 64,
2445
  "bits": 8
2446
  },
2447
- "language_model.model.layers.23.mlp.down_proj": {
2448
  "group_size": 64,
2449
  "bits": 8
2450
  },
@@ -2476,11 +2474,11 @@
2476
  "group_size": 64,
2477
  "bits": 8
2478
  },
2479
- "language_model.model.layers.24.mlp.up_proj": {
2480
  "group_size": 64,
2481
  "bits": 8
2482
  },
2483
- "language_model.model.layers.24.mlp.down_proj": {
2484
  "group_size": 64,
2485
  "bits": 8
2486
  },
@@ -2512,11 +2510,11 @@
2512
  "group_size": 64,
2513
  "bits": 8
2514
  },
2515
- "language_model.model.layers.25.mlp.up_proj": {
2516
  "group_size": 64,
2517
  "bits": 8
2518
  },
2519
- "language_model.model.layers.25.mlp.down_proj": {
2520
  "group_size": 64,
2521
  "bits": 8
2522
  },
@@ -2548,11 +2546,11 @@
2548
  "group_size": 64,
2549
  "bits": 8
2550
  },
2551
- "language_model.model.layers.26.mlp.up_proj": {
2552
  "group_size": 64,
2553
  "bits": 8
2554
  },
2555
- "language_model.model.layers.26.mlp.down_proj": {
2556
  "group_size": 64,
2557
  "bits": 8
2558
  },
@@ -2584,11 +2582,11 @@
2584
  "group_size": 64,
2585
  "bits": 8
2586
  },
2587
- "language_model.model.layers.27.mlp.up_proj": {
2588
  "group_size": 64,
2589
  "bits": 8
2590
  },
2591
- "language_model.model.layers.27.mlp.down_proj": {
2592
  "group_size": 64,
2593
  "bits": 8
2594
  },
@@ -2620,11 +2618,11 @@
2620
  "group_size": 64,
2621
  "bits": 8
2622
  },
2623
- "language_model.model.layers.28.mlp.up_proj": {
2624
  "group_size": 64,
2625
  "bits": 8
2626
  },
2627
- "language_model.model.layers.28.mlp.down_proj": {
2628
  "group_size": 64,
2629
  "bits": 8
2630
  },
@@ -2656,11 +2654,11 @@
2656
  "group_size": 64,
2657
  "bits": 8
2658
  },
2659
- "language_model.model.layers.29.mlp.up_proj": {
2660
  "group_size": 64,
2661
  "bits": 8
2662
  },
2663
- "language_model.model.layers.29.mlp.down_proj": {
2664
  "group_size": 64,
2665
  "bits": 8
2666
  },
@@ -2692,11 +2690,11 @@
2692
  "group_size": 64,
2693
  "bits": 8
2694
  },
2695
- "language_model.model.layers.30.mlp.up_proj": {
2696
  "group_size": 64,
2697
  "bits": 8
2698
  },
2699
- "language_model.model.layers.30.mlp.down_proj": {
2700
  "group_size": 64,
2701
  "bits": 8
2702
  },
@@ -2728,11 +2726,11 @@
2728
  "group_size": 64,
2729
  "bits": 8
2730
  },
2731
- "language_model.model.layers.31.mlp.up_proj": {
2732
  "group_size": 64,
2733
  "bits": 8
2734
  },
2735
- "language_model.model.layers.31.mlp.down_proj": {
2736
  "group_size": 64,
2737
  "bits": 8
2738
  },
@@ -2764,11 +2762,11 @@
2764
  "group_size": 64,
2765
  "bits": 8
2766
  },
2767
- "language_model.model.layers.32.mlp.up_proj": {
2768
  "group_size": 64,
2769
  "bits": 8
2770
  },
2771
- "language_model.model.layers.32.mlp.down_proj": {
2772
  "group_size": 64,
2773
  "bits": 8
2774
  },
@@ -2800,11 +2798,11 @@
2800
  "group_size": 64,
2801
  "bits": 8
2802
  },
2803
- "language_model.model.layers.33.mlp.up_proj": {
2804
  "group_size": 64,
2805
  "bits": 8
2806
  },
2807
- "language_model.model.layers.33.mlp.down_proj": {
2808
  "group_size": 64,
2809
  "bits": 8
2810
  },
@@ -2836,11 +2834,11 @@
2836
  "group_size": 64,
2837
  "bits": 8
2838
  },
2839
- "language_model.model.layers.34.mlp.up_proj": {
2840
  "group_size": 64,
2841
  "bits": 8
2842
  },
2843
- "language_model.model.layers.34.mlp.down_proj": {
2844
  "group_size": 64,
2845
  "bits": 8
2846
  },
@@ -2872,11 +2870,11 @@
2872
  "group_size": 64,
2873
  "bits": 8
2874
  },
2875
- "language_model.model.layers.35.mlp.up_proj": {
2876
  "group_size": 64,
2877
  "bits": 8
2878
  },
2879
- "language_model.model.layers.35.mlp.down_proj": {
2880
  "group_size": 64,
2881
  "bits": 8
2882
  },
@@ -2908,11 +2906,11 @@
2908
  "group_size": 64,
2909
  "bits": 8
2910
  },
2911
- "language_model.model.layers.36.mlp.up_proj": {
2912
  "group_size": 64,
2913
  "bits": 8
2914
  },
2915
- "language_model.model.layers.36.mlp.down_proj": {
2916
  "group_size": 64,
2917
  "bits": 8
2918
  },
@@ -2944,11 +2942,11 @@
2944
  "group_size": 64,
2945
  "bits": 8
2946
  },
2947
- "language_model.model.layers.37.mlp.up_proj": {
2948
  "group_size": 64,
2949
  "bits": 8
2950
  },
2951
- "language_model.model.layers.37.mlp.down_proj": {
2952
  "group_size": 64,
2953
  "bits": 8
2954
  },
@@ -2980,11 +2978,11 @@
2980
  "group_size": 64,
2981
  "bits": 8
2982
  },
2983
- "language_model.model.layers.38.mlp.up_proj": {
2984
  "group_size": 64,
2985
  "bits": 8
2986
  },
2987
- "language_model.model.layers.38.mlp.down_proj": {
2988
  "group_size": 64,
2989
  "bits": 8
2990
  },
@@ -3016,11 +3014,11 @@
3016
  "group_size": 64,
3017
  "bits": 8
3018
  },
3019
- "language_model.model.layers.39.mlp.up_proj": {
3020
  "group_size": 64,
3021
  "bits": 8
3022
  },
3023
- "language_model.model.layers.39.mlp.down_proj": {
3024
  "group_size": 64,
3025
  "bits": 8
3026
  },
@@ -3052,11 +3050,11 @@
3052
  "group_size": 64,
3053
  "bits": 8
3054
  },
3055
- "language_model.model.layers.40.mlp.up_proj": {
3056
  "group_size": 64,
3057
  "bits": 8
3058
  },
3059
- "language_model.model.layers.40.mlp.down_proj": {
3060
  "group_size": 64,
3061
  "bits": 8
3062
  },
@@ -3088,11 +3086,11 @@
3088
  "group_size": 64,
3089
  "bits": 8
3090
  },
3091
- "language_model.model.layers.41.mlp.up_proj": {
3092
  "group_size": 64,
3093
  "bits": 8
3094
  },
3095
- "language_model.model.layers.41.mlp.down_proj": {
3096
  "group_size": 64,
3097
  "bits": 8
3098
  },
@@ -3107,10 +3105,6 @@
3107
  "language_model.model.embed_tokens_per_layer": {
3108
  "group_size": 64,
3109
  "bits": 8
3110
- },
3111
- "language_model.model.per_layer_model_projection": {
3112
- "group_size": 64,
3113
- "bits": 8
3114
  }
3115
  },
3116
  "text_config": {
@@ -3207,5 +3201,48 @@
3207
  "tie_word_embeddings": true,
3208
  "transformers_version": "5.5.0.dev0",
3209
  "video_token_id": 258884,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3210
  "vision_soft_tokens_per_image": 280
3211
  }
 
60
  "quantization": {
61
  "group_size": 64,
62
  "bits": 8,
63
+ "mode": "affine",
64
  "language_model.model.embed_tokens": {
65
  "group_size": 64,
66
  "bits": 8
 
85
  "group_size": 64,
86
  "bits": 8
87
  },
88
+ "language_model.model.layers.0.mlp.down_proj": {
89
  "group_size": 64,
90
  "bits": 8
91
  },
92
+ "language_model.model.layers.0.mlp.up_proj": {
93
  "group_size": 64,
94
  "bits": 8
95
  },
 
121
  "group_size": 64,
122
  "bits": 8
123
  },
124
+ "language_model.model.layers.1.mlp.down_proj": {
125
  "group_size": 64,
126
  "bits": 8
127
  },
128
+ "language_model.model.layers.1.mlp.up_proj": {
129
  "group_size": 64,
130
  "bits": 8
131
  },
 
157
  "group_size": 64,
158
  "bits": 8
159
  },
160
+ "language_model.model.layers.2.mlp.down_proj": {
161
  "group_size": 64,
162
  "bits": 8
163
  },
164
+ "language_model.model.layers.2.mlp.up_proj": {
165
  "group_size": 64,
166
  "bits": 8
167
  },
 
193
  "group_size": 64,
194
  "bits": 8
195
  },
196
+ "language_model.model.layers.3.mlp.down_proj": {
197
  "group_size": 64,
198
  "bits": 8
199
  },
200
+ "language_model.model.layers.3.mlp.up_proj": {
201
  "group_size": 64,
202
  "bits": 8
203
  },
 
229
  "group_size": 64,
230
  "bits": 8
231
  },
232
+ "language_model.model.layers.4.mlp.down_proj": {
233
  "group_size": 64,
234
  "bits": 8
235
  },
236
+ "language_model.model.layers.4.mlp.up_proj": {
237
  "group_size": 64,
238
  "bits": 8
239
  },
 
265
  "group_size": 64,
266
  "bits": 8
267
  },
268
+ "language_model.model.layers.5.mlp.down_proj": {
269
  "group_size": 64,
270
  "bits": 8
271
  },
272
+ "language_model.model.layers.5.mlp.up_proj": {
273
  "group_size": 64,
274
  "bits": 8
275
  },
 
301
  "group_size": 64,
302
  "bits": 8
303
  },
304
+ "language_model.model.layers.6.mlp.down_proj": {
305
  "group_size": 64,
306
  "bits": 8
307
  },
308
+ "language_model.model.layers.6.mlp.up_proj": {
309
  "group_size": 64,
310
  "bits": 8
311
  },
 
337
  "group_size": 64,
338
  "bits": 8
339
  },
340
+ "language_model.model.layers.7.mlp.down_proj": {
341
  "group_size": 64,
342
  "bits": 8
343
  },
344
+ "language_model.model.layers.7.mlp.up_proj": {
345
  "group_size": 64,
346
  "bits": 8
347
  },
 
373
  "group_size": 64,
374
  "bits": 8
375
  },
376
+ "language_model.model.layers.8.mlp.down_proj": {
377
  "group_size": 64,
378
  "bits": 8
379
  },
380
+ "language_model.model.layers.8.mlp.up_proj": {
381
  "group_size": 64,
382
  "bits": 8
383
  },
 
409
  "group_size": 64,
410
  "bits": 8
411
  },
412
+ "language_model.model.layers.9.mlp.down_proj": {
413
  "group_size": 64,
414
  "bits": 8
415
  },
416
+ "language_model.model.layers.9.mlp.up_proj": {
417
  "group_size": 64,
418
  "bits": 8
419
  },
 
445
  "group_size": 64,
446
  "bits": 8
447
  },
448
+ "language_model.model.layers.10.mlp.down_proj": {
449
  "group_size": 64,
450
  "bits": 8
451
  },
452
+ "language_model.model.layers.10.mlp.up_proj": {
453
  "group_size": 64,
454
  "bits": 8
455
  },
 
481
  "group_size": 64,
482
  "bits": 8
483
  },
484
+ "language_model.model.layers.11.mlp.down_proj": {
485
  "group_size": 64,
486
  "bits": 8
487
  },
488
+ "language_model.model.layers.11.mlp.up_proj": {
489
  "group_size": 64,
490
  "bits": 8
491
  },
 
517
  "group_size": 64,
518
  "bits": 8
519
  },
520
+ "language_model.model.layers.12.mlp.down_proj": {
521
  "group_size": 64,
522
  "bits": 8
523
  },
524
+ "language_model.model.layers.12.mlp.up_proj": {
525
  "group_size": 64,
526
  "bits": 8
527
  },
 
553
  "group_size": 64,
554
  "bits": 8
555
  },
556
+ "language_model.model.layers.13.mlp.down_proj": {
557
  "group_size": 64,
558
  "bits": 8
559
  },
560
+ "language_model.model.layers.13.mlp.up_proj": {
561
  "group_size": 64,
562
  "bits": 8
563
  },
 
589
  "group_size": 64,
590
  "bits": 8
591
  },
592
+ "language_model.model.layers.14.mlp.down_proj": {
593
  "group_size": 64,
594
  "bits": 8
595
  },
596
+ "language_model.model.layers.14.mlp.up_proj": {
597
  "group_size": 64,
598
  "bits": 8
599
  },
 
625
  "group_size": 64,
626
  "bits": 8
627
  },
628
+ "language_model.model.layers.15.mlp.down_proj": {
629
  "group_size": 64,
630
  "bits": 8
631
  },
632
+ "language_model.model.layers.15.mlp.up_proj": {
633
  "group_size": 64,
634
  "bits": 8
635
  },
 
661
  "group_size": 64,
662
  "bits": 8
663
  },
664
+ "language_model.model.layers.16.mlp.down_proj": {
665
  "group_size": 64,
666
  "bits": 8
667
  },
668
+ "language_model.model.layers.16.mlp.up_proj": {
669
  "group_size": 64,
670
  "bits": 8
671
  },
 
697
  "group_size": 64,
698
  "bits": 8
699
  },
700
+ "language_model.model.layers.17.mlp.down_proj": {
701
  "group_size": 64,
702
  "bits": 8
703
  },
704
+ "language_model.model.layers.17.mlp.up_proj": {
705
  "group_size": 64,
706
  "bits": 8
707
  },
 
733
  "group_size": 64,
734
  "bits": 8
735
  },
736
+ "language_model.model.layers.18.mlp.down_proj": {
737
  "group_size": 64,
738
  "bits": 8
739
  },
740
+ "language_model.model.layers.18.mlp.up_proj": {
741
  "group_size": 64,
742
  "bits": 8
743
  },
 
769
  "group_size": 64,
770
  "bits": 8
771
  },
772
+ "language_model.model.layers.19.mlp.down_proj": {
773
  "group_size": 64,
774
  "bits": 8
775
  },
776
+ "language_model.model.layers.19.mlp.up_proj": {
777
  "group_size": 64,
778
  "bits": 8
779
  },
 
805
  "group_size": 64,
806
  "bits": 8
807
  },
808
+ "language_model.model.layers.20.mlp.down_proj": {
809
  "group_size": 64,
810
  "bits": 8
811
  },
812
+ "language_model.model.layers.20.mlp.up_proj": {
813
  "group_size": 64,
814
  "bits": 8
815
  },
 
841
  "group_size": 64,
842
  "bits": 8
843
  },
844
+ "language_model.model.layers.21.mlp.down_proj": {
845
  "group_size": 64,
846
  "bits": 8
847
  },
848
+ "language_model.model.layers.21.mlp.up_proj": {
849
  "group_size": 64,
850
  "bits": 8
851
  },
 
877
  "group_size": 64,
878
  "bits": 8
879
  },
880
+ "language_model.model.layers.22.mlp.down_proj": {
881
  "group_size": 64,
882
  "bits": 8
883
  },
884
+ "language_model.model.layers.22.mlp.up_proj": {
885
  "group_size": 64,
886
  "bits": 8
887
  },
 
913
  "group_size": 64,
914
  "bits": 8
915
  },
916
+ "language_model.model.layers.23.mlp.down_proj": {
917
  "group_size": 64,
918
  "bits": 8
919
  },
920
+ "language_model.model.layers.23.mlp.up_proj": {
921
  "group_size": 64,
922
  "bits": 8
923
  },
 
949
  "group_size": 64,
950
  "bits": 8
951
  },
952
+ "language_model.model.layers.24.mlp.down_proj": {
953
  "group_size": 64,
954
  "bits": 8
955
  },
956
+ "language_model.model.layers.24.mlp.up_proj": {
957
  "group_size": 64,
958
  "bits": 8
959
  },
 
985
  "group_size": 64,
986
  "bits": 8
987
  },
988
+ "language_model.model.layers.25.mlp.down_proj": {
989
  "group_size": 64,
990
  "bits": 8
991
  },
992
+ "language_model.model.layers.25.mlp.up_proj": {
993
  "group_size": 64,
994
  "bits": 8
995
  },
 
1021
  "group_size": 64,
1022
  "bits": 8
1023
  },
1024
+ "language_model.model.layers.26.mlp.down_proj": {
1025
  "group_size": 64,
1026
  "bits": 8
1027
  },
1028
+ "language_model.model.layers.26.mlp.up_proj": {
1029
  "group_size": 64,
1030
  "bits": 8
1031
  },
 
1057
  "group_size": 64,
1058
  "bits": 8
1059
  },
1060
+ "language_model.model.layers.27.mlp.down_proj": {
1061
  "group_size": 64,
1062
  "bits": 8
1063
  },
1064
+ "language_model.model.layers.27.mlp.up_proj": {
1065
  "group_size": 64,
1066
  "bits": 8
1067
  },
 
1093
  "group_size": 64,
1094
  "bits": 8
1095
  },
1096
+ "language_model.model.layers.28.mlp.down_proj": {
1097
  "group_size": 64,
1098
  "bits": 8
1099
  },
1100
+ "language_model.model.layers.28.mlp.up_proj": {
1101
  "group_size": 64,
1102
  "bits": 8
1103
  },
 
1129
  "group_size": 64,
1130
  "bits": 8
1131
  },
1132
+ "language_model.model.layers.29.mlp.down_proj": {
1133
  "group_size": 64,
1134
  "bits": 8
1135
  },
1136
+ "language_model.model.layers.29.mlp.up_proj": {
1137
  "group_size": 64,
1138
  "bits": 8
1139
  },
 
1165
  "group_size": 64,
1166
  "bits": 8
1167
  },
1168
+ "language_model.model.layers.30.mlp.down_proj": {
1169
  "group_size": 64,
1170
  "bits": 8
1171
  },
1172
+ "language_model.model.layers.30.mlp.up_proj": {
1173
  "group_size": 64,
1174
  "bits": 8
1175
  },
 
1201
  "group_size": 64,
1202
  "bits": 8
1203
  },
1204
+ "language_model.model.layers.31.mlp.down_proj": {
1205
  "group_size": 64,
1206
  "bits": 8
1207
  },
1208
+ "language_model.model.layers.31.mlp.up_proj": {
1209
  "group_size": 64,
1210
  "bits": 8
1211
  },
 
1237
  "group_size": 64,
1238
  "bits": 8
1239
  },
1240
+ "language_model.model.layers.32.mlp.down_proj": {
1241
  "group_size": 64,
1242
  "bits": 8
1243
  },
1244
+ "language_model.model.layers.32.mlp.up_proj": {
1245
  "group_size": 64,
1246
  "bits": 8
1247
  },
 
1273
  "group_size": 64,
1274
  "bits": 8
1275
  },
1276
+ "language_model.model.layers.33.mlp.down_proj": {
1277
  "group_size": 64,
1278
  "bits": 8
1279
  },
1280
+ "language_model.model.layers.33.mlp.up_proj": {
1281
  "group_size": 64,
1282
  "bits": 8
1283
  },
 
1309
  "group_size": 64,
1310
  "bits": 8
1311
  },
1312
+ "language_model.model.layers.34.mlp.down_proj": {
1313
  "group_size": 64,
1314
  "bits": 8
1315
  },
1316
+ "language_model.model.layers.34.mlp.up_proj": {
1317
  "group_size": 64,
1318
  "bits": 8
1319
  },
 
1345
  "group_size": 64,
1346
  "bits": 8
1347
  },
1348
+ "language_model.model.layers.35.mlp.down_proj": {
1349
  "group_size": 64,
1350
  "bits": 8
1351
  },
1352
+ "language_model.model.layers.35.mlp.up_proj": {
1353
  "group_size": 64,
1354
  "bits": 8
1355
  },
 
1381
  "group_size": 64,
1382
  "bits": 8
1383
  },
1384
+ "language_model.model.layers.36.mlp.down_proj": {
1385
  "group_size": 64,
1386
  "bits": 8
1387
  },
1388
+ "language_model.model.layers.36.mlp.up_proj": {
1389
  "group_size": 64,
1390
  "bits": 8
1391
  },
 
1417
  "group_size": 64,
1418
  "bits": 8
1419
  },
1420
+ "language_model.model.layers.37.mlp.down_proj": {
1421
  "group_size": 64,
1422
  "bits": 8
1423
  },
1424
+ "language_model.model.layers.37.mlp.up_proj": {
1425
  "group_size": 64,
1426
  "bits": 8
1427
  },
 
1453
  "group_size": 64,
1454
  "bits": 8
1455
  },
1456
+ "language_model.model.layers.38.mlp.down_proj": {
1457
  "group_size": 64,
1458
  "bits": 8
1459
  },
1460
+ "language_model.model.layers.38.mlp.up_proj": {
1461
  "group_size": 64,
1462
  "bits": 8
1463
  },
 
1489
  "group_size": 64,
1490
  "bits": 8
1491
  },
1492
+ "language_model.model.layers.39.mlp.down_proj": {
1493
  "group_size": 64,
1494
  "bits": 8
1495
  },
1496
+ "language_model.model.layers.39.mlp.up_proj": {
1497
  "group_size": 64,
1498
  "bits": 8
1499
  },
 
1525
  "group_size": 64,
1526
  "bits": 8
1527
  },
1528
+ "language_model.model.layers.40.mlp.down_proj": {
1529
  "group_size": 64,
1530
  "bits": 8
1531
  },
1532
+ "language_model.model.layers.40.mlp.up_proj": {
1533
  "group_size": 64,
1534
  "bits": 8
1535
  },
 
1561
  "group_size": 64,
1562
  "bits": 8
1563
  },
1564
+ "language_model.model.layers.41.mlp.down_proj": {
1565
  "group_size": 64,
1566
  "bits": 8
1567
  },
1568
+ "language_model.model.layers.41.mlp.up_proj": {
1569
  "group_size": 64,
1570
  "bits": 8
1571
  },
 
1580
  "language_model.model.embed_tokens_per_layer": {
1581
  "group_size": 64,
1582
  "bits": 8
 
 
 
 
1583
  }
1584
  },
1585
  "quantization_config": {
1586
  "group_size": 64,
1587
  "bits": 8,
1588
+ "mode": "affine",
1589
  "language_model.model.embed_tokens": {
1590
  "group_size": 64,
1591
  "bits": 8
 
1610
  "group_size": 64,
1611
  "bits": 8
1612
  },
1613
+ "language_model.model.layers.0.mlp.down_proj": {
1614
  "group_size": 64,
1615
  "bits": 8
1616
  },
1617
+ "language_model.model.layers.0.mlp.up_proj": {
1618
  "group_size": 64,
1619
  "bits": 8
1620
  },
 
1646
  "group_size": 64,
1647
  "bits": 8
1648
  },
1649
+ "language_model.model.layers.1.mlp.down_proj": {
1650
  "group_size": 64,
1651
  "bits": 8
1652
  },
1653
+ "language_model.model.layers.1.mlp.up_proj": {
1654
  "group_size": 64,
1655
  "bits": 8
1656
  },
 
1682
  "group_size": 64,
1683
  "bits": 8
1684
  },
1685
+ "language_model.model.layers.2.mlp.down_proj": {
1686
  "group_size": 64,
1687
  "bits": 8
1688
  },
1689
+ "language_model.model.layers.2.mlp.up_proj": {
1690
  "group_size": 64,
1691
  "bits": 8
1692
  },
 
1718
  "group_size": 64,
1719
  "bits": 8
1720
  },
1721
+ "language_model.model.layers.3.mlp.down_proj": {
1722
  "group_size": 64,
1723
  "bits": 8
1724
  },
1725
+ "language_model.model.layers.3.mlp.up_proj": {
1726
  "group_size": 64,
1727
  "bits": 8
1728
  },
 
1754
  "group_size": 64,
1755
  "bits": 8
1756
  },
1757
+ "language_model.model.layers.4.mlp.down_proj": {
1758
  "group_size": 64,
1759
  "bits": 8
1760
  },
1761
+ "language_model.model.layers.4.mlp.up_proj": {
1762
  "group_size": 64,
1763
  "bits": 8
1764
  },
 
1790
  "group_size": 64,
1791
  "bits": 8
1792
  },
1793
+ "language_model.model.layers.5.mlp.down_proj": {
1794
  "group_size": 64,
1795
  "bits": 8
1796
  },
1797
+ "language_model.model.layers.5.mlp.up_proj": {
1798
  "group_size": 64,
1799
  "bits": 8
1800
  },
 
1826
  "group_size": 64,
1827
  "bits": 8
1828
  },
1829
+ "language_model.model.layers.6.mlp.down_proj": {
1830
  "group_size": 64,
1831
  "bits": 8
1832
  },
1833
+ "language_model.model.layers.6.mlp.up_proj": {
1834
  "group_size": 64,
1835
  "bits": 8
1836
  },
 
1862
  "group_size": 64,
1863
  "bits": 8
1864
  },
1865
+ "language_model.model.layers.7.mlp.down_proj": {
1866
  "group_size": 64,
1867
  "bits": 8
1868
  },
1869
+ "language_model.model.layers.7.mlp.up_proj": {
1870
  "group_size": 64,
1871
  "bits": 8
1872
  },
 
1898
  "group_size": 64,
1899
  "bits": 8
1900
  },
1901
+ "language_model.model.layers.8.mlp.down_proj": {
1902
  "group_size": 64,
1903
  "bits": 8
1904
  },
1905
+ "language_model.model.layers.8.mlp.up_proj": {
1906
  "group_size": 64,
1907
  "bits": 8
1908
  },
 
1934
  "group_size": 64,
1935
  "bits": 8
1936
  },
1937
+ "language_model.model.layers.9.mlp.down_proj": {
1938
  "group_size": 64,
1939
  "bits": 8
1940
  },
1941
+ "language_model.model.layers.9.mlp.up_proj": {
1942
  "group_size": 64,
1943
  "bits": 8
1944
  },
 
1970
  "group_size": 64,
1971
  "bits": 8
1972
  },
1973
+ "language_model.model.layers.10.mlp.down_proj": {
1974
  "group_size": 64,
1975
  "bits": 8
1976
  },
1977
+ "language_model.model.layers.10.mlp.up_proj": {
1978
  "group_size": 64,
1979
  "bits": 8
1980
  },
 
2006
  "group_size": 64,
2007
  "bits": 8
2008
  },
2009
+ "language_model.model.layers.11.mlp.down_proj": {
2010
  "group_size": 64,
2011
  "bits": 8
2012
  },
2013
+ "language_model.model.layers.11.mlp.up_proj": {
2014
  "group_size": 64,
2015
  "bits": 8
2016
  },
 
2042
  "group_size": 64,
2043
  "bits": 8
2044
  },
2045
+ "language_model.model.layers.12.mlp.down_proj": {
2046
  "group_size": 64,
2047
  "bits": 8
2048
  },
2049
+ "language_model.model.layers.12.mlp.up_proj": {
2050
  "group_size": 64,
2051
  "bits": 8
2052
  },
 
2078
  "group_size": 64,
2079
  "bits": 8
2080
  },
2081
+ "language_model.model.layers.13.mlp.down_proj": {
2082
  "group_size": 64,
2083
  "bits": 8
2084
  },
2085
+ "language_model.model.layers.13.mlp.up_proj": {
2086
  "group_size": 64,
2087
  "bits": 8
2088
  },
 
2114
  "group_size": 64,
2115
  "bits": 8
2116
  },
2117
+ "language_model.model.layers.14.mlp.down_proj": {
2118
  "group_size": 64,
2119
  "bits": 8
2120
  },
2121
+ "language_model.model.layers.14.mlp.up_proj": {
2122
  "group_size": 64,
2123
  "bits": 8
2124
  },
 
2150
  "group_size": 64,
2151
  "bits": 8
2152
  },
2153
+ "language_model.model.layers.15.mlp.down_proj": {
2154
  "group_size": 64,
2155
  "bits": 8
2156
  },
2157
+ "language_model.model.layers.15.mlp.up_proj": {
2158
  "group_size": 64,
2159
  "bits": 8
2160
  },
 
2186
  "group_size": 64,
2187
  "bits": 8
2188
  },
2189
+ "language_model.model.layers.16.mlp.down_proj": {
2190
  "group_size": 64,
2191
  "bits": 8
2192
  },
2193
+ "language_model.model.layers.16.mlp.up_proj": {
2194
  "group_size": 64,
2195
  "bits": 8
2196
  },
 
2222
  "group_size": 64,
2223
  "bits": 8
2224
  },
2225
+ "language_model.model.layers.17.mlp.down_proj": {
2226
  "group_size": 64,
2227
  "bits": 8
2228
  },
2229
+ "language_model.model.layers.17.mlp.up_proj": {
2230
  "group_size": 64,
2231
  "bits": 8
2232
  },
 
2258
  "group_size": 64,
2259
  "bits": 8
2260
  },
2261
+ "language_model.model.layers.18.mlp.down_proj": {
2262
  "group_size": 64,
2263
  "bits": 8
2264
  },
2265
+ "language_model.model.layers.18.mlp.up_proj": {
2266
  "group_size": 64,
2267
  "bits": 8
2268
  },
 
2294
  "group_size": 64,
2295
  "bits": 8
2296
  },
2297
+ "language_model.model.layers.19.mlp.down_proj": {
2298
  "group_size": 64,
2299
  "bits": 8
2300
  },
2301
+ "language_model.model.layers.19.mlp.up_proj": {
2302
  "group_size": 64,
2303
  "bits": 8
2304
  },
 
2330
  "group_size": 64,
2331
  "bits": 8
2332
  },
2333
+ "language_model.model.layers.20.mlp.down_proj": {
2334
  "group_size": 64,
2335
  "bits": 8
2336
  },
2337
+ "language_model.model.layers.20.mlp.up_proj": {
2338
  "group_size": 64,
2339
  "bits": 8
2340
  },
 
2366
  "group_size": 64,
2367
  "bits": 8
2368
  },
2369
+ "language_model.model.layers.21.mlp.down_proj": {
2370
  "group_size": 64,
2371
  "bits": 8
2372
  },
2373
+ "language_model.model.layers.21.mlp.up_proj": {
2374
  "group_size": 64,
2375
  "bits": 8
2376
  },
 
2402
  "group_size": 64,
2403
  "bits": 8
2404
  },
2405
+ "language_model.model.layers.22.mlp.down_proj": {
2406
  "group_size": 64,
2407
  "bits": 8
2408
  },
2409
+ "language_model.model.layers.22.mlp.up_proj": {
2410
  "group_size": 64,
2411
  "bits": 8
2412
  },
 
2438
  "group_size": 64,
2439
  "bits": 8
2440
  },
2441
+ "language_model.model.layers.23.mlp.down_proj": {
2442
  "group_size": 64,
2443
  "bits": 8
2444
  },
2445
+ "language_model.model.layers.23.mlp.up_proj": {
2446
  "group_size": 64,
2447
  "bits": 8
2448
  },
 
2474
  "group_size": 64,
2475
  "bits": 8
2476
  },
2477
+ "language_model.model.layers.24.mlp.down_proj": {
2478
  "group_size": 64,
2479
  "bits": 8
2480
  },
2481
+ "language_model.model.layers.24.mlp.up_proj": {
2482
  "group_size": 64,
2483
  "bits": 8
2484
  },
 
2510
  "group_size": 64,
2511
  "bits": 8
2512
  },
2513
+ "language_model.model.layers.25.mlp.down_proj": {
2514
  "group_size": 64,
2515
  "bits": 8
2516
  },
2517
+ "language_model.model.layers.25.mlp.up_proj": {
2518
  "group_size": 64,
2519
  "bits": 8
2520
  },
 
2546
  "group_size": 64,
2547
  "bits": 8
2548
  },
2549
+ "language_model.model.layers.26.mlp.down_proj": {
2550
  "group_size": 64,
2551
  "bits": 8
2552
  },
2553
+ "language_model.model.layers.26.mlp.up_proj": {
2554
  "group_size": 64,
2555
  "bits": 8
2556
  },
 
2582
  "group_size": 64,
2583
  "bits": 8
2584
  },
2585
+ "language_model.model.layers.27.mlp.down_proj": {
2586
  "group_size": 64,
2587
  "bits": 8
2588
  },
2589
+ "language_model.model.layers.27.mlp.up_proj": {
2590
  "group_size": 64,
2591
  "bits": 8
2592
  },
 
2618
  "group_size": 64,
2619
  "bits": 8
2620
  },
2621
+ "language_model.model.layers.28.mlp.down_proj": {
2622
  "group_size": 64,
2623
  "bits": 8
2624
  },
2625
+ "language_model.model.layers.28.mlp.up_proj": {
2626
  "group_size": 64,
2627
  "bits": 8
2628
  },
 
2654
  "group_size": 64,
2655
  "bits": 8
2656
  },
2657
+ "language_model.model.layers.29.mlp.down_proj": {
2658
  "group_size": 64,
2659
  "bits": 8
2660
  },
2661
+ "language_model.model.layers.29.mlp.up_proj": {
2662
  "group_size": 64,
2663
  "bits": 8
2664
  },
 
2690
  "group_size": 64,
2691
  "bits": 8
2692
  },
2693
+ "language_model.model.layers.30.mlp.down_proj": {
2694
  "group_size": 64,
2695
  "bits": 8
2696
  },
2697
+ "language_model.model.layers.30.mlp.up_proj": {
2698
  "group_size": 64,
2699
  "bits": 8
2700
  },
 
2726
  "group_size": 64,
2727
  "bits": 8
2728
  },
2729
+ "language_model.model.layers.31.mlp.down_proj": {
2730
  "group_size": 64,
2731
  "bits": 8
2732
  },
2733
+ "language_model.model.layers.31.mlp.up_proj": {
2734
  "group_size": 64,
2735
  "bits": 8
2736
  },
 
2762
  "group_size": 64,
2763
  "bits": 8
2764
  },
2765
+ "language_model.model.layers.32.mlp.down_proj": {
2766
  "group_size": 64,
2767
  "bits": 8
2768
  },
2769
+ "language_model.model.layers.32.mlp.up_proj": {
2770
  "group_size": 64,
2771
  "bits": 8
2772
  },
 
2798
  "group_size": 64,
2799
  "bits": 8
2800
  },
2801
+ "language_model.model.layers.33.mlp.down_proj": {
2802
  "group_size": 64,
2803
  "bits": 8
2804
  },
2805
+ "language_model.model.layers.33.mlp.up_proj": {
2806
  "group_size": 64,
2807
  "bits": 8
2808
  },
 
2834
  "group_size": 64,
2835
  "bits": 8
2836
  },
2837
+ "language_model.model.layers.34.mlp.down_proj": {
2838
  "group_size": 64,
2839
  "bits": 8
2840
  },
2841
+ "language_model.model.layers.34.mlp.up_proj": {
2842
  "group_size": 64,
2843
  "bits": 8
2844
  },
 
2870
  "group_size": 64,
2871
  "bits": 8
2872
  },
2873
+ "language_model.model.layers.35.mlp.down_proj": {
2874
  "group_size": 64,
2875
  "bits": 8
2876
  },
2877
+ "language_model.model.layers.35.mlp.up_proj": {
2878
  "group_size": 64,
2879
  "bits": 8
2880
  },
 
2906
  "group_size": 64,
2907
  "bits": 8
2908
  },
2909
+ "language_model.model.layers.36.mlp.down_proj": {
2910
  "group_size": 64,
2911
  "bits": 8
2912
  },
2913
+ "language_model.model.layers.36.mlp.up_proj": {
2914
  "group_size": 64,
2915
  "bits": 8
2916
  },
 
2942
  "group_size": 64,
2943
  "bits": 8
2944
  },
2945
+ "language_model.model.layers.37.mlp.down_proj": {
2946
  "group_size": 64,
2947
  "bits": 8
2948
  },
2949
+ "language_model.model.layers.37.mlp.up_proj": {
2950
  "group_size": 64,
2951
  "bits": 8
2952
  },
 
2978
  "group_size": 64,
2979
  "bits": 8
2980
  },
2981
+ "language_model.model.layers.38.mlp.down_proj": {
2982
  "group_size": 64,
2983
  "bits": 8
2984
  },
2985
+ "language_model.model.layers.38.mlp.up_proj": {
2986
  "group_size": 64,
2987
  "bits": 8
2988
  },
 
3014
  "group_size": 64,
3015
  "bits": 8
3016
  },
3017
+ "language_model.model.layers.39.mlp.down_proj": {
3018
  "group_size": 64,
3019
  "bits": 8
3020
  },
3021
+ "language_model.model.layers.39.mlp.up_proj": {
3022
  "group_size": 64,
3023
  "bits": 8
3024
  },
 
3050
  "group_size": 64,
3051
  "bits": 8
3052
  },
3053
+ "language_model.model.layers.40.mlp.down_proj": {
3054
  "group_size": 64,
3055
  "bits": 8
3056
  },
3057
+ "language_model.model.layers.40.mlp.up_proj": {
3058
  "group_size": 64,
3059
  "bits": 8
3060
  },
 
3086
  "group_size": 64,
3087
  "bits": 8
3088
  },
3089
+ "language_model.model.layers.41.mlp.down_proj": {
3090
  "group_size": 64,
3091
  "bits": 8
3092
  },
3093
+ "language_model.model.layers.41.mlp.up_proj": {
3094
  "group_size": 64,
3095
  "bits": 8
3096
  },
 
3105
  "language_model.model.embed_tokens_per_layer": {
3106
  "group_size": 64,
3107
  "bits": 8
 
 
 
 
3108
  }
3109
  },
3110
  "text_config": {
 
3201
  "tie_word_embeddings": true,
3202
  "transformers_version": "5.5.0.dev0",
3203
  "video_token_id": 258884,
3204
+ "vision_config": {
3205
+ "_name_or_path": "",
3206
+ "architectures": null,
3207
+ "attention_bias": false,
3208
+ "attention_dropout": 0.0,
3209
+ "chunk_size_feed_forward": 0,
3210
+ "default_output_length": 280,
3211
+ "dtype": "bfloat16",
3212
+ "global_head_dim": 64,
3213
+ "head_dim": 64,
3214
+ "hidden_activation": "gelu_pytorch_tanh",
3215
+ "hidden_size": 768,
3216
+ "id2label": {
3217
+ "0": "LABEL_0",
3218
+ "1": "LABEL_1"
3219
+ },
3220
+ "initializer_range": 0.02,
3221
+ "intermediate_size": 3072,
3222
+ "is_encoder_decoder": false,
3223
+ "label2id": {
3224
+ "LABEL_0": 0,
3225
+ "LABEL_1": 1
3226
+ },
3227
+ "max_position_embeddings": 131072,
3228
+ "model_type": "gemma4_vision",
3229
+ "num_attention_heads": 12,
3230
+ "num_hidden_layers": 16,
3231
+ "num_key_value_heads": 12,
3232
+ "output_attentions": false,
3233
+ "output_hidden_states": false,
3234
+ "patch_size": 16,
3235
+ "pooling_kernel_size": 3,
3236
+ "position_embedding_size": 10240,
3237
+ "problem_type": null,
3238
+ "return_dict": true,
3239
+ "rms_norm_eps": 1e-06,
3240
+ "rope_parameters": {
3241
+ "rope_theta": 100.0,
3242
+ "rope_type": "default"
3243
+ },
3244
+ "standardize": false,
3245
+ "use_clipped_linears": true
3246
+ },
3247
  "vision_soft_tokens_per_image": 280
3248
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14dce6f7bb00e6a2806aed8825b08c93a131d501b79ed2f845798647b41257ff
3
- size 4964690708
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba98954a0dbc93fb857edd85792ac03d986cac6e6d3e830b48cb4ea929ca6d76
3
+ size 4964690704
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fcd2f2cfcfb7cd025351071a80af947c9249c1a112b7ffc32163bfab9507f99
3
- size 3023979920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54e952e5cc84eaea341fb8ce4bd10609751ac90ef2f6be673f9ecc6375cfc31c
3
+ size 4006137753
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
processor_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "audio_seq_length": 750,
3
+ "image_processor": {
4
+ "do_convert_rgb": true,
5
+ "do_normalize": false,
6
+ "do_rescale": true,
7
+ "do_resize": true,
8
+ "image_mean": [
9
+ 0.0,
10
+ 0.0,
11
+ 0.0
12
+ ],
13
+ "image_processor_type": "Gemma4ImageProcessor",
14
+ "image_seq_length": 280,
15
+ "image_std": [
16
+ 1.0,
17
+ 1.0,
18
+ 1.0
19
+ ],
20
+ "max_soft_tokens": 280,
21
+ "patch_size": 16,
22
+ "pooling_kernel_size": 3,
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 224,
27
+ "width": 224
28
+ }
29
+ },
30
+ "image_seq_length": 280,
31
+ "processor_class": "Gemma4Processor",
32
+ "feature_extractor": {
33
+ "feature_extractor_type": "Gemma4AudioFeatureExtractor",
34
+ "sampling_rate": 16000,
35
+ "num_mel_filters": 128,
36
+ "fft_length": 1024,
37
+ "hop_length": 160,
38
+ "chunk_duration": 8.0,
39
+ "overlap_duration": 1.0
40
+ },
41
+ "audio_ms_per_token": 40
42
+ }
tokenizer_config.json CHANGED
@@ -17,50 +17,71 @@
17
  "<|video|>"
18
  ],
19
  "image_token": "<|image|>",
 
20
  "mask_token": "<mask>",
21
  "model_max_length": 1000000000000000019884624838656,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  "pad_token": "<pad>",
23
  "padding_side": "left",
24
  "processor_class": "Gemma4Processor",
25
  "response_schema": {
26
- "type": "object",
27
  "properties": {
 
 
 
28
  "role": {
29
  "const": "assistant"
30
  },
31
  "thinking": {
32
  "type": "string"
33
  },
34
- "content": {
35
- "type": "string"
36
- },
37
  "tool_calls": {
38
- "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>",
39
- "type": "array",
40
  "items": {
41
- "type": "object",
42
  "properties": {
43
- "type": {
44
- "const": "function"
45
- },
46
  "function": {
47
- "type": "object",
48
- "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})",
49
  "properties": {
50
- "name": {
51
- "type": "string"
52
- },
53
  "arguments": {
 
54
  "type": "object",
55
- "x-parser": "gemma4-tool-call",
56
- "additionalProperties": {}
 
 
57
  }
58
- }
 
 
 
 
 
59
  }
60
- }
61
- }
 
 
 
62
  }
63
  },
 
64
  "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?P<content>(?:(?!\\<turn\\|\\>)(?!\\<\\|tool_response\\>).)+)?(?:\\<turn\\|\\>|\\<\\|tool_response\\>)?"
65
  },
66
  "soc_token": "<|channel>",
 
17
  "<|video|>"
18
  ],
19
  "image_token": "<|image|>",
20
+ "is_local": true,
21
  "mask_token": "<mask>",
22
  "model_max_length": 1000000000000000019884624838656,
23
+ "model_specific_special_tokens": {
24
+ "audio_token": "<|audio|>",
25
+ "boa_token": "<|audio>",
26
+ "boi_token": "<|image>",
27
+ "eoa_token": "<audio|>",
28
+ "eoc_token": "<channel|>",
29
+ "eoi_token": "<image|>",
30
+ "eot_token": "<turn|>",
31
+ "escape_token": "<|\"|>",
32
+ "etc_token": "<tool_call|>",
33
+ "etd_token": "<tool|>",
34
+ "etr_token": "<tool_response|>",
35
+ "image_token": "<|image|>",
36
+ "soc_token": "<|channel>",
37
+ "sot_token": "<|turn>",
38
+ "stc_token": "<|tool_call>",
39
+ "std_token": "<|tool>",
40
+ "str_token": "<|tool_response>",
41
+ "think_token": "<|think|>"
42
+ },
43
  "pad_token": "<pad>",
44
  "padding_side": "left",
45
  "processor_class": "Gemma4Processor",
46
  "response_schema": {
 
47
  "properties": {
48
+ "content": {
49
+ "type": "string"
50
+ },
51
  "role": {
52
  "const": "assistant"
53
  },
54
  "thinking": {
55
  "type": "string"
56
  },
 
 
 
57
  "tool_calls": {
 
 
58
  "items": {
 
59
  "properties": {
 
 
 
60
  "function": {
 
 
61
  "properties": {
 
 
 
62
  "arguments": {
63
+ "additionalProperties": {},
64
  "type": "object",
65
+ "x-parser": "gemma4-tool-call"
66
+ },
67
+ "name": {
68
+ "type": "string"
69
  }
70
+ },
71
+ "type": "object",
72
+ "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})"
73
+ },
74
+ "type": {
75
+ "const": "function"
76
  }
77
+ },
78
+ "type": "object"
79
+ },
80
+ "type": "array",
81
+ "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>"
82
  }
83
  },
84
+ "type": "object",
85
  "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?P<content>(?:(?!\\<turn\\|\\>)(?!\\<\\|tool_response\\>).)+)?(?:\\<turn\\|\\>|\\<\\|tool_response\\>)?"
86
  },
87
  "soc_token": "<|channel>",