sudo-0x2a commited on
Commit
25a2ef3
·
1 Parent(s): 7ea446a

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. .DS_Store +0 -0
  2. README.md +0 -28
  3. config.json +280 -280
  4. model.safetensors.index.json +32 -32
  5. tokenizer_config.json +3 -1
.DS_Store ADDED
Binary file (6.15 kB). View file
 
README.md CHANGED
@@ -7,31 +7,3 @@ base_model: Qwen/Qwen3-14B
7
  tags:
8
  - mlx
9
  ---
10
-
11
- # mlx-community/Qwen3-14B-4bit-AWQ
12
-
13
- This model [mlx-community/Qwen3-14B-4bit-AWQ](https://huggingface.co/mlx-community/Qwen3-14B-AWQ-MLX) was
14
- converted to MLX format from [Qwen/Qwen3-14B](https://huggingface.co/Qwen/Qwen3-14B)
15
- using mlx-lm version **0.24.0**.
16
-
17
- ## Use with mlx
18
-
19
- ```bash
20
- pip install mlx-lm
21
- ```
22
-
23
- ```python
24
- from mlx_lm import load, generate
25
-
26
- model, tokenizer = load("mlx-community/Qwen3-14B-4bit-AWQ")
27
-
28
- prompt = "hello"
29
-
30
- if tokenizer.chat_template is not None:
31
- messages = [{"role": "user", "content": prompt}]
32
- prompt = tokenizer.apply_chat_template(
33
- messages, add_generation_prompt=True
34
- )
35
-
36
- response = generate(model, tokenizer, prompt=prompt, verbose=True)
37
- ```
 
7
  tags:
8
  - mlx
9
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.json CHANGED
@@ -25,1321 +25,1321 @@
25
  "bits": 4
26
  },
27
  "model.layers.0.self_attn.q_proj": {
28
- "group_size": 128,
29
  "bits": 4
30
  },
31
  "model.layers.0.self_attn.k_proj": {
32
- "group_size": 128,
33
  "bits": 4
34
  },
35
  "model.layers.0.self_attn.v_proj": {
36
- "group_size": 128,
37
  "bits": 4
38
  },
39
  "model.layers.0.self_attn.o_proj": {
40
- "group_size": 128,
41
  "bits": 4
42
  },
43
  "model.layers.0.self_attn.q_norm": false,
44
  "model.layers.0.self_attn.k_norm": false,
45
  "model.layers.0.self_attn.rope": false,
46
  "model.layers.0.mlp.gate_proj": {
47
- "group_size": 128,
48
  "bits": 4
49
  },
50
  "model.layers.0.mlp.down_proj": {
51
- "group_size": 128,
52
  "bits": 4
53
  },
54
  "model.layers.0.mlp.up_proj": {
55
- "group_size": 128,
56
  "bits": 4
57
  },
58
  "model.layers.0.input_layernorm": false,
59
  "model.layers.0.post_attention_layernorm": false,
60
  "model.layers.1.self_attn.q_proj": {
61
- "group_size": 128,
62
  "bits": 4
63
  },
64
  "model.layers.1.self_attn.k_proj": {
65
- "group_size": 128,
66
  "bits": 4
67
  },
68
  "model.layers.1.self_attn.v_proj": {
69
- "group_size": 128,
70
  "bits": 4
71
  },
72
  "model.layers.1.self_attn.o_proj": {
73
- "group_size": 128,
74
  "bits": 4
75
  },
76
  "model.layers.1.self_attn.q_norm": false,
77
  "model.layers.1.self_attn.k_norm": false,
78
  "model.layers.1.self_attn.rope": false,
79
  "model.layers.1.mlp.gate_proj": {
80
- "group_size": 128,
81
  "bits": 4
82
  },
83
  "model.layers.1.mlp.down_proj": {
84
- "group_size": 128,
85
  "bits": 4
86
  },
87
  "model.layers.1.mlp.up_proj": {
88
- "group_size": 128,
89
  "bits": 4
90
  },
91
  "model.layers.1.input_layernorm": false,
92
  "model.layers.1.post_attention_layernorm": false,
93
  "model.layers.2.self_attn.q_proj": {
94
- "group_size": 128,
95
  "bits": 4
96
  },
97
  "model.layers.2.self_attn.k_proj": {
98
- "group_size": 128,
99
  "bits": 4
100
  },
101
  "model.layers.2.self_attn.v_proj": {
102
- "group_size": 128,
103
  "bits": 4
104
  },
105
  "model.layers.2.self_attn.o_proj": {
106
- "group_size": 128,
107
  "bits": 4
108
  },
109
  "model.layers.2.self_attn.q_norm": false,
110
  "model.layers.2.self_attn.k_norm": false,
111
  "model.layers.2.self_attn.rope": false,
112
  "model.layers.2.mlp.gate_proj": {
113
- "group_size": 128,
114
  "bits": 4
115
  },
116
  "model.layers.2.mlp.down_proj": {
117
- "group_size": 128,
118
  "bits": 4
119
  },
120
  "model.layers.2.mlp.up_proj": {
121
- "group_size": 128,
122
  "bits": 4
123
  },
124
  "model.layers.2.input_layernorm": false,
125
  "model.layers.2.post_attention_layernorm": false,
126
  "model.layers.3.self_attn.q_proj": {
127
- "group_size": 128,
128
  "bits": 4
129
  },
130
  "model.layers.3.self_attn.k_proj": {
131
- "group_size": 128,
132
  "bits": 4
133
  },
134
  "model.layers.3.self_attn.v_proj": {
135
- "group_size": 128,
136
  "bits": 4
137
  },
138
  "model.layers.3.self_attn.o_proj": {
139
- "group_size": 128,
140
  "bits": 4
141
  },
142
  "model.layers.3.self_attn.q_norm": false,
143
  "model.layers.3.self_attn.k_norm": false,
144
  "model.layers.3.self_attn.rope": false,
145
  "model.layers.3.mlp.gate_proj": {
146
- "group_size": 128,
147
  "bits": 4
148
  },
149
  "model.layers.3.mlp.down_proj": {
150
- "group_size": 128,
151
  "bits": 4
152
  },
153
  "model.layers.3.mlp.up_proj": {
154
- "group_size": 128,
155
  "bits": 4
156
  },
157
  "model.layers.3.input_layernorm": false,
158
  "model.layers.3.post_attention_layernorm": false,
159
  "model.layers.4.self_attn.q_proj": {
160
- "group_size": 128,
161
  "bits": 4
162
  },
163
  "model.layers.4.self_attn.k_proj": {
164
- "group_size": 128,
165
  "bits": 4
166
  },
167
  "model.layers.4.self_attn.v_proj": {
168
- "group_size": 128,
169
  "bits": 4
170
  },
171
  "model.layers.4.self_attn.o_proj": {
172
- "group_size": 128,
173
  "bits": 4
174
  },
175
  "model.layers.4.self_attn.q_norm": false,
176
  "model.layers.4.self_attn.k_norm": false,
177
  "model.layers.4.self_attn.rope": false,
178
  "model.layers.4.mlp.gate_proj": {
179
- "group_size": 128,
180
  "bits": 4
181
  },
182
  "model.layers.4.mlp.down_proj": {
183
- "group_size": 128,
184
  "bits": 4
185
  },
186
  "model.layers.4.mlp.up_proj": {
187
- "group_size": 128,
188
  "bits": 4
189
  },
190
  "model.layers.4.input_layernorm": false,
191
  "model.layers.4.post_attention_layernorm": false,
192
  "model.layers.5.self_attn.q_proj": {
193
- "group_size": 128,
194
  "bits": 4
195
  },
196
  "model.layers.5.self_attn.k_proj": {
197
- "group_size": 128,
198
  "bits": 4
199
  },
200
  "model.layers.5.self_attn.v_proj": {
201
- "group_size": 128,
202
  "bits": 4
203
  },
204
  "model.layers.5.self_attn.o_proj": {
205
- "group_size": 128,
206
  "bits": 4
207
  },
208
  "model.layers.5.self_attn.q_norm": false,
209
  "model.layers.5.self_attn.k_norm": false,
210
  "model.layers.5.self_attn.rope": false,
211
  "model.layers.5.mlp.gate_proj": {
212
- "group_size": 128,
213
  "bits": 4
214
  },
215
  "model.layers.5.mlp.down_proj": {
216
- "group_size": 128,
217
  "bits": 4
218
  },
219
  "model.layers.5.mlp.up_proj": {
220
- "group_size": 128,
221
  "bits": 4
222
  },
223
  "model.layers.5.input_layernorm": false,
224
  "model.layers.5.post_attention_layernorm": false,
225
  "model.layers.6.self_attn.q_proj": {
226
- "group_size": 128,
227
  "bits": 4
228
  },
229
  "model.layers.6.self_attn.k_proj": {
230
- "group_size": 128,
231
  "bits": 4
232
  },
233
  "model.layers.6.self_attn.v_proj": {
234
- "group_size": 128,
235
  "bits": 4
236
  },
237
  "model.layers.6.self_attn.o_proj": {
238
- "group_size": 128,
239
  "bits": 4
240
  },
241
  "model.layers.6.self_attn.q_norm": false,
242
  "model.layers.6.self_attn.k_norm": false,
243
  "model.layers.6.self_attn.rope": false,
244
  "model.layers.6.mlp.gate_proj": {
245
- "group_size": 128,
246
  "bits": 4
247
  },
248
  "model.layers.6.mlp.down_proj": {
249
- "group_size": 128,
250
  "bits": 4
251
  },
252
  "model.layers.6.mlp.up_proj": {
253
- "group_size": 128,
254
  "bits": 4
255
  },
256
  "model.layers.6.input_layernorm": false,
257
  "model.layers.6.post_attention_layernorm": false,
258
  "model.layers.7.self_attn.q_proj": {
259
- "group_size": 128,
260
  "bits": 4
261
  },
262
  "model.layers.7.self_attn.k_proj": {
263
- "group_size": 128,
264
  "bits": 4
265
  },
266
  "model.layers.7.self_attn.v_proj": {
267
- "group_size": 128,
268
  "bits": 4
269
  },
270
  "model.layers.7.self_attn.o_proj": {
271
- "group_size": 128,
272
  "bits": 4
273
  },
274
  "model.layers.7.self_attn.q_norm": false,
275
  "model.layers.7.self_attn.k_norm": false,
276
  "model.layers.7.self_attn.rope": false,
277
  "model.layers.7.mlp.gate_proj": {
278
- "group_size": 128,
279
  "bits": 4
280
  },
281
  "model.layers.7.mlp.down_proj": {
282
- "group_size": 128,
283
  "bits": 4
284
  },
285
  "model.layers.7.mlp.up_proj": {
286
- "group_size": 128,
287
  "bits": 4
288
  },
289
  "model.layers.7.input_layernorm": false,
290
  "model.layers.7.post_attention_layernorm": false,
291
  "model.layers.8.self_attn.q_proj": {
292
- "group_size": 128,
293
  "bits": 4
294
  },
295
  "model.layers.8.self_attn.k_proj": {
296
- "group_size": 128,
297
  "bits": 4
298
  },
299
  "model.layers.8.self_attn.v_proj": {
300
- "group_size": 128,
301
  "bits": 4
302
  },
303
  "model.layers.8.self_attn.o_proj": {
304
- "group_size": 128,
305
  "bits": 4
306
  },
307
  "model.layers.8.self_attn.q_norm": false,
308
  "model.layers.8.self_attn.k_norm": false,
309
  "model.layers.8.self_attn.rope": false,
310
  "model.layers.8.mlp.gate_proj": {
311
- "group_size": 128,
312
  "bits": 4
313
  },
314
  "model.layers.8.mlp.down_proj": {
315
- "group_size": 128,
316
  "bits": 4
317
  },
318
  "model.layers.8.mlp.up_proj": {
319
- "group_size": 128,
320
  "bits": 4
321
  },
322
  "model.layers.8.input_layernorm": false,
323
  "model.layers.8.post_attention_layernorm": false,
324
  "model.layers.9.self_attn.q_proj": {
325
- "group_size": 128,
326
  "bits": 4
327
  },
328
  "model.layers.9.self_attn.k_proj": {
329
- "group_size": 128,
330
  "bits": 4
331
  },
332
  "model.layers.9.self_attn.v_proj": {
333
- "group_size": 128,
334
  "bits": 4
335
  },
336
  "model.layers.9.self_attn.o_proj": {
337
- "group_size": 128,
338
  "bits": 4
339
  },
340
  "model.layers.9.self_attn.q_norm": false,
341
  "model.layers.9.self_attn.k_norm": false,
342
  "model.layers.9.self_attn.rope": false,
343
  "model.layers.9.mlp.gate_proj": {
344
- "group_size": 128,
345
  "bits": 4
346
  },
347
  "model.layers.9.mlp.down_proj": {
348
- "group_size": 128,
349
  "bits": 4
350
  },
351
  "model.layers.9.mlp.up_proj": {
352
- "group_size": 128,
353
  "bits": 4
354
  },
355
  "model.layers.9.input_layernorm": false,
356
  "model.layers.9.post_attention_layernorm": false,
357
  "model.layers.10.self_attn.q_proj": {
358
- "group_size": 128,
359
  "bits": 4
360
  },
361
  "model.layers.10.self_attn.k_proj": {
362
- "group_size": 128,
363
  "bits": 4
364
  },
365
  "model.layers.10.self_attn.v_proj": {
366
- "group_size": 128,
367
  "bits": 4
368
  },
369
  "model.layers.10.self_attn.o_proj": {
370
- "group_size": 128,
371
  "bits": 4
372
  },
373
  "model.layers.10.self_attn.q_norm": false,
374
  "model.layers.10.self_attn.k_norm": false,
375
  "model.layers.10.self_attn.rope": false,
376
  "model.layers.10.mlp.gate_proj": {
377
- "group_size": 128,
378
  "bits": 4
379
  },
380
  "model.layers.10.mlp.down_proj": {
381
- "group_size": 128,
382
  "bits": 4
383
  },
384
  "model.layers.10.mlp.up_proj": {
385
- "group_size": 128,
386
  "bits": 4
387
  },
388
  "model.layers.10.input_layernorm": false,
389
  "model.layers.10.post_attention_layernorm": false,
390
  "model.layers.11.self_attn.q_proj": {
391
- "group_size": 128,
392
  "bits": 4
393
  },
394
  "model.layers.11.self_attn.k_proj": {
395
- "group_size": 128,
396
  "bits": 4
397
  },
398
  "model.layers.11.self_attn.v_proj": {
399
- "group_size": 128,
400
  "bits": 4
401
  },
402
  "model.layers.11.self_attn.o_proj": {
403
- "group_size": 128,
404
  "bits": 4
405
  },
406
  "model.layers.11.self_attn.q_norm": false,
407
  "model.layers.11.self_attn.k_norm": false,
408
  "model.layers.11.self_attn.rope": false,
409
  "model.layers.11.mlp.gate_proj": {
410
- "group_size": 128,
411
  "bits": 4
412
  },
413
  "model.layers.11.mlp.down_proj": {
414
- "group_size": 128,
415
  "bits": 4
416
  },
417
  "model.layers.11.mlp.up_proj": {
418
- "group_size": 128,
419
  "bits": 4
420
  },
421
  "model.layers.11.input_layernorm": false,
422
  "model.layers.11.post_attention_layernorm": false,
423
  "model.layers.12.self_attn.q_proj": {
424
- "group_size": 128,
425
  "bits": 4
426
  },
427
  "model.layers.12.self_attn.k_proj": {
428
- "group_size": 128,
429
  "bits": 4
430
  },
431
  "model.layers.12.self_attn.v_proj": {
432
- "group_size": 128,
433
  "bits": 4
434
  },
435
  "model.layers.12.self_attn.o_proj": {
436
- "group_size": 128,
437
  "bits": 4
438
  },
439
  "model.layers.12.self_attn.q_norm": false,
440
  "model.layers.12.self_attn.k_norm": false,
441
  "model.layers.12.self_attn.rope": false,
442
  "model.layers.12.mlp.gate_proj": {
443
- "group_size": 128,
444
  "bits": 4
445
  },
446
  "model.layers.12.mlp.down_proj": {
447
- "group_size": 128,
448
  "bits": 4
449
  },
450
  "model.layers.12.mlp.up_proj": {
451
- "group_size": 128,
452
  "bits": 4
453
  },
454
  "model.layers.12.input_layernorm": false,
455
  "model.layers.12.post_attention_layernorm": false,
456
  "model.layers.13.self_attn.q_proj": {
457
- "group_size": 128,
458
  "bits": 4
459
  },
460
  "model.layers.13.self_attn.k_proj": {
461
- "group_size": 128,
462
  "bits": 4
463
  },
464
  "model.layers.13.self_attn.v_proj": {
465
- "group_size": 128,
466
  "bits": 4
467
  },
468
  "model.layers.13.self_attn.o_proj": {
469
- "group_size": 128,
470
  "bits": 4
471
  },
472
  "model.layers.13.self_attn.q_norm": false,
473
  "model.layers.13.self_attn.k_norm": false,
474
  "model.layers.13.self_attn.rope": false,
475
  "model.layers.13.mlp.gate_proj": {
476
- "group_size": 128,
477
  "bits": 4
478
  },
479
  "model.layers.13.mlp.down_proj": {
480
- "group_size": 128,
481
  "bits": 4
482
  },
483
  "model.layers.13.mlp.up_proj": {
484
- "group_size": 128,
485
  "bits": 4
486
  },
487
  "model.layers.13.input_layernorm": false,
488
  "model.layers.13.post_attention_layernorm": false,
489
  "model.layers.14.self_attn.q_proj": {
490
- "group_size": 128,
491
  "bits": 4
492
  },
493
  "model.layers.14.self_attn.k_proj": {
494
- "group_size": 128,
495
  "bits": 4
496
  },
497
  "model.layers.14.self_attn.v_proj": {
498
- "group_size": 128,
499
  "bits": 4
500
  },
501
  "model.layers.14.self_attn.o_proj": {
502
- "group_size": 128,
503
  "bits": 4
504
  },
505
  "model.layers.14.self_attn.q_norm": false,
506
  "model.layers.14.self_attn.k_norm": false,
507
  "model.layers.14.self_attn.rope": false,
508
  "model.layers.14.mlp.gate_proj": {
509
- "group_size": 128,
510
  "bits": 4
511
  },
512
  "model.layers.14.mlp.down_proj": {
513
- "group_size": 128,
514
  "bits": 4
515
  },
516
  "model.layers.14.mlp.up_proj": {
517
- "group_size": 128,
518
  "bits": 4
519
  },
520
  "model.layers.14.input_layernorm": false,
521
  "model.layers.14.post_attention_layernorm": false,
522
  "model.layers.15.self_attn.q_proj": {
523
- "group_size": 128,
524
  "bits": 4
525
  },
526
  "model.layers.15.self_attn.k_proj": {
527
- "group_size": 128,
528
  "bits": 4
529
  },
530
  "model.layers.15.self_attn.v_proj": {
531
- "group_size": 128,
532
  "bits": 4
533
  },
534
  "model.layers.15.self_attn.o_proj": {
535
- "group_size": 128,
536
  "bits": 4
537
  },
538
  "model.layers.15.self_attn.q_norm": false,
539
  "model.layers.15.self_attn.k_norm": false,
540
  "model.layers.15.self_attn.rope": false,
541
  "model.layers.15.mlp.gate_proj": {
542
- "group_size": 128,
543
  "bits": 4
544
  },
545
  "model.layers.15.mlp.down_proj": {
546
- "group_size": 128,
547
  "bits": 4
548
  },
549
  "model.layers.15.mlp.up_proj": {
550
- "group_size": 128,
551
  "bits": 4
552
  },
553
  "model.layers.15.input_layernorm": false,
554
  "model.layers.15.post_attention_layernorm": false,
555
  "model.layers.16.self_attn.q_proj": {
556
- "group_size": 128,
557
  "bits": 4
558
  },
559
  "model.layers.16.self_attn.k_proj": {
560
- "group_size": 128,
561
  "bits": 4
562
  },
563
  "model.layers.16.self_attn.v_proj": {
564
- "group_size": 128,
565
  "bits": 4
566
  },
567
  "model.layers.16.self_attn.o_proj": {
568
- "group_size": 128,
569
  "bits": 4
570
  },
571
  "model.layers.16.self_attn.q_norm": false,
572
  "model.layers.16.self_attn.k_norm": false,
573
  "model.layers.16.self_attn.rope": false,
574
  "model.layers.16.mlp.gate_proj": {
575
- "group_size": 128,
576
  "bits": 4
577
  },
578
  "model.layers.16.mlp.down_proj": {
579
- "group_size": 128,
580
  "bits": 4
581
  },
582
  "model.layers.16.mlp.up_proj": {
583
- "group_size": 128,
584
  "bits": 4
585
  },
586
  "model.layers.16.input_layernorm": false,
587
  "model.layers.16.post_attention_layernorm": false,
588
  "model.layers.17.self_attn.q_proj": {
589
- "group_size": 128,
590
  "bits": 4
591
  },
592
  "model.layers.17.self_attn.k_proj": {
593
- "group_size": 128,
594
  "bits": 4
595
  },
596
  "model.layers.17.self_attn.v_proj": {
597
- "group_size": 128,
598
  "bits": 4
599
  },
600
  "model.layers.17.self_attn.o_proj": {
601
- "group_size": 128,
602
  "bits": 4
603
  },
604
  "model.layers.17.self_attn.q_norm": false,
605
  "model.layers.17.self_attn.k_norm": false,
606
  "model.layers.17.self_attn.rope": false,
607
  "model.layers.17.mlp.gate_proj": {
608
- "group_size": 128,
609
  "bits": 4
610
  },
611
  "model.layers.17.mlp.down_proj": {
612
- "group_size": 128,
613
  "bits": 4
614
  },
615
  "model.layers.17.mlp.up_proj": {
616
- "group_size": 128,
617
  "bits": 4
618
  },
619
  "model.layers.17.input_layernorm": false,
620
  "model.layers.17.post_attention_layernorm": false,
621
  "model.layers.18.self_attn.q_proj": {
622
- "group_size": 128,
623
  "bits": 4
624
  },
625
  "model.layers.18.self_attn.k_proj": {
626
- "group_size": 128,
627
  "bits": 4
628
  },
629
  "model.layers.18.self_attn.v_proj": {
630
- "group_size": 128,
631
  "bits": 4
632
  },
633
  "model.layers.18.self_attn.o_proj": {
634
- "group_size": 128,
635
  "bits": 4
636
  },
637
  "model.layers.18.self_attn.q_norm": false,
638
  "model.layers.18.self_attn.k_norm": false,
639
  "model.layers.18.self_attn.rope": false,
640
  "model.layers.18.mlp.gate_proj": {
641
- "group_size": 128,
642
  "bits": 4
643
  },
644
  "model.layers.18.mlp.down_proj": {
645
- "group_size": 128,
646
  "bits": 4
647
  },
648
  "model.layers.18.mlp.up_proj": {
649
- "group_size": 128,
650
  "bits": 4
651
  },
652
  "model.layers.18.input_layernorm": false,
653
  "model.layers.18.post_attention_layernorm": false,
654
  "model.layers.19.self_attn.q_proj": {
655
- "group_size": 128,
656
  "bits": 4
657
  },
658
  "model.layers.19.self_attn.k_proj": {
659
- "group_size": 128,
660
  "bits": 4
661
  },
662
  "model.layers.19.self_attn.v_proj": {
663
- "group_size": 128,
664
  "bits": 4
665
  },
666
  "model.layers.19.self_attn.o_proj": {
667
- "group_size": 128,
668
  "bits": 4
669
  },
670
  "model.layers.19.self_attn.q_norm": false,
671
  "model.layers.19.self_attn.k_norm": false,
672
  "model.layers.19.self_attn.rope": false,
673
  "model.layers.19.mlp.gate_proj": {
674
- "group_size": 128,
675
  "bits": 4
676
  },
677
  "model.layers.19.mlp.down_proj": {
678
- "group_size": 128,
679
  "bits": 4
680
  },
681
  "model.layers.19.mlp.up_proj": {
682
- "group_size": 128,
683
  "bits": 4
684
  },
685
  "model.layers.19.input_layernorm": false,
686
  "model.layers.19.post_attention_layernorm": false,
687
  "model.layers.20.self_attn.q_proj": {
688
- "group_size": 128,
689
  "bits": 4
690
  },
691
  "model.layers.20.self_attn.k_proj": {
692
- "group_size": 128,
693
  "bits": 4
694
  },
695
  "model.layers.20.self_attn.v_proj": {
696
- "group_size": 128,
697
  "bits": 4
698
  },
699
  "model.layers.20.self_attn.o_proj": {
700
- "group_size": 128,
701
  "bits": 4
702
  },
703
  "model.layers.20.self_attn.q_norm": false,
704
  "model.layers.20.self_attn.k_norm": false,
705
  "model.layers.20.self_attn.rope": false,
706
  "model.layers.20.mlp.gate_proj": {
707
- "group_size": 128,
708
  "bits": 4
709
  },
710
  "model.layers.20.mlp.down_proj": {
711
- "group_size": 128,
712
  "bits": 4
713
  },
714
  "model.layers.20.mlp.up_proj": {
715
- "group_size": 128,
716
  "bits": 4
717
  },
718
  "model.layers.20.input_layernorm": false,
719
  "model.layers.20.post_attention_layernorm": false,
720
  "model.layers.21.self_attn.q_proj": {
721
- "group_size": 128,
722
  "bits": 4
723
  },
724
  "model.layers.21.self_attn.k_proj": {
725
- "group_size": 128,
726
  "bits": 4
727
  },
728
  "model.layers.21.self_attn.v_proj": {
729
- "group_size": 128,
730
  "bits": 4
731
  },
732
  "model.layers.21.self_attn.o_proj": {
733
- "group_size": 128,
734
  "bits": 4
735
  },
736
  "model.layers.21.self_attn.q_norm": false,
737
  "model.layers.21.self_attn.k_norm": false,
738
  "model.layers.21.self_attn.rope": false,
739
  "model.layers.21.mlp.gate_proj": {
740
- "group_size": 128,
741
  "bits": 4
742
  },
743
  "model.layers.21.mlp.down_proj": {
744
- "group_size": 128,
745
  "bits": 4
746
  },
747
  "model.layers.21.mlp.up_proj": {
748
- "group_size": 128,
749
  "bits": 4
750
  },
751
  "model.layers.21.input_layernorm": false,
752
  "model.layers.21.post_attention_layernorm": false,
753
  "model.layers.22.self_attn.q_proj": {
754
- "group_size": 128,
755
  "bits": 4
756
  },
757
  "model.layers.22.self_attn.k_proj": {
758
- "group_size": 128,
759
  "bits": 4
760
  },
761
  "model.layers.22.self_attn.v_proj": {
762
- "group_size": 128,
763
  "bits": 4
764
  },
765
  "model.layers.22.self_attn.o_proj": {
766
- "group_size": 128,
767
  "bits": 4
768
  },
769
  "model.layers.22.self_attn.q_norm": false,
770
  "model.layers.22.self_attn.k_norm": false,
771
  "model.layers.22.self_attn.rope": false,
772
  "model.layers.22.mlp.gate_proj": {
773
- "group_size": 128,
774
  "bits": 4
775
  },
776
  "model.layers.22.mlp.down_proj": {
777
- "group_size": 128,
778
  "bits": 4
779
  },
780
  "model.layers.22.mlp.up_proj": {
781
- "group_size": 128,
782
  "bits": 4
783
  },
784
  "model.layers.22.input_layernorm": false,
785
  "model.layers.22.post_attention_layernorm": false,
786
  "model.layers.23.self_attn.q_proj": {
787
- "group_size": 128,
788
  "bits": 4
789
  },
790
  "model.layers.23.self_attn.k_proj": {
791
- "group_size": 128,
792
  "bits": 4
793
  },
794
  "model.layers.23.self_attn.v_proj": {
795
- "group_size": 128,
796
  "bits": 4
797
  },
798
  "model.layers.23.self_attn.o_proj": {
799
- "group_size": 128,
800
  "bits": 4
801
  },
802
  "model.layers.23.self_attn.q_norm": false,
803
  "model.layers.23.self_attn.k_norm": false,
804
  "model.layers.23.self_attn.rope": false,
805
  "model.layers.23.mlp.gate_proj": {
806
- "group_size": 128,
807
  "bits": 4
808
  },
809
  "model.layers.23.mlp.down_proj": {
810
- "group_size": 128,
811
  "bits": 4
812
  },
813
  "model.layers.23.mlp.up_proj": {
814
- "group_size": 128,
815
  "bits": 4
816
  },
817
  "model.layers.23.input_layernorm": false,
818
  "model.layers.23.post_attention_layernorm": false,
819
  "model.layers.24.self_attn.q_proj": {
820
- "group_size": 128,
821
  "bits": 4
822
  },
823
  "model.layers.24.self_attn.k_proj": {
824
- "group_size": 128,
825
  "bits": 4
826
  },
827
  "model.layers.24.self_attn.v_proj": {
828
- "group_size": 128,
829
  "bits": 4
830
  },
831
  "model.layers.24.self_attn.o_proj": {
832
- "group_size": 128,
833
  "bits": 4
834
  },
835
  "model.layers.24.self_attn.q_norm": false,
836
  "model.layers.24.self_attn.k_norm": false,
837
  "model.layers.24.self_attn.rope": false,
838
  "model.layers.24.mlp.gate_proj": {
839
- "group_size": 128,
840
  "bits": 4
841
  },
842
  "model.layers.24.mlp.down_proj": {
843
- "group_size": 128,
844
  "bits": 4
845
  },
846
  "model.layers.24.mlp.up_proj": {
847
- "group_size": 128,
848
  "bits": 4
849
  },
850
  "model.layers.24.input_layernorm": false,
851
  "model.layers.24.post_attention_layernorm": false,
852
  "model.layers.25.self_attn.q_proj": {
853
- "group_size": 128,
854
  "bits": 4
855
  },
856
  "model.layers.25.self_attn.k_proj": {
857
- "group_size": 128,
858
  "bits": 4
859
  },
860
  "model.layers.25.self_attn.v_proj": {
861
- "group_size": 128,
862
  "bits": 4
863
  },
864
  "model.layers.25.self_attn.o_proj": {
865
- "group_size": 128,
866
  "bits": 4
867
  },
868
  "model.layers.25.self_attn.q_norm": false,
869
  "model.layers.25.self_attn.k_norm": false,
870
  "model.layers.25.self_attn.rope": false,
871
  "model.layers.25.mlp.gate_proj": {
872
- "group_size": 128,
873
  "bits": 4
874
  },
875
  "model.layers.25.mlp.down_proj": {
876
- "group_size": 128,
877
  "bits": 4
878
  },
879
  "model.layers.25.mlp.up_proj": {
880
- "group_size": 128,
881
  "bits": 4
882
  },
883
  "model.layers.25.input_layernorm": false,
884
  "model.layers.25.post_attention_layernorm": false,
885
  "model.layers.26.self_attn.q_proj": {
886
- "group_size": 128,
887
  "bits": 4
888
  },
889
  "model.layers.26.self_attn.k_proj": {
890
- "group_size": 128,
891
  "bits": 4
892
  },
893
  "model.layers.26.self_attn.v_proj": {
894
- "group_size": 128,
895
  "bits": 4
896
  },
897
  "model.layers.26.self_attn.o_proj": {
898
- "group_size": 128,
899
  "bits": 4
900
  },
901
  "model.layers.26.self_attn.q_norm": false,
902
  "model.layers.26.self_attn.k_norm": false,
903
  "model.layers.26.self_attn.rope": false,
904
  "model.layers.26.mlp.gate_proj": {
905
- "group_size": 128,
906
  "bits": 4
907
  },
908
  "model.layers.26.mlp.down_proj": {
909
- "group_size": 128,
910
  "bits": 4
911
  },
912
  "model.layers.26.mlp.up_proj": {
913
- "group_size": 128,
914
  "bits": 4
915
  },
916
  "model.layers.26.input_layernorm": false,
917
  "model.layers.26.post_attention_layernorm": false,
918
  "model.layers.27.self_attn.q_proj": {
919
- "group_size": 128,
920
  "bits": 4
921
  },
922
  "model.layers.27.self_attn.k_proj": {
923
- "group_size": 128,
924
  "bits": 4
925
  },
926
  "model.layers.27.self_attn.v_proj": {
927
- "group_size": 128,
928
  "bits": 4
929
  },
930
  "model.layers.27.self_attn.o_proj": {
931
- "group_size": 128,
932
  "bits": 4
933
  },
934
  "model.layers.27.self_attn.q_norm": false,
935
  "model.layers.27.self_attn.k_norm": false,
936
  "model.layers.27.self_attn.rope": false,
937
  "model.layers.27.mlp.gate_proj": {
938
- "group_size": 128,
939
  "bits": 4
940
  },
941
  "model.layers.27.mlp.down_proj": {
942
- "group_size": 128,
943
  "bits": 4
944
  },
945
  "model.layers.27.mlp.up_proj": {
946
- "group_size": 128,
947
  "bits": 4
948
  },
949
  "model.layers.27.input_layernorm": false,
950
  "model.layers.27.post_attention_layernorm": false,
951
  "model.layers.28.self_attn.q_proj": {
952
- "group_size": 128,
953
  "bits": 4
954
  },
955
  "model.layers.28.self_attn.k_proj": {
956
- "group_size": 128,
957
  "bits": 4
958
  },
959
  "model.layers.28.self_attn.v_proj": {
960
- "group_size": 128,
961
  "bits": 4
962
  },
963
  "model.layers.28.self_attn.o_proj": {
964
- "group_size": 128,
965
  "bits": 4
966
  },
967
  "model.layers.28.self_attn.q_norm": false,
968
  "model.layers.28.self_attn.k_norm": false,
969
  "model.layers.28.self_attn.rope": false,
970
  "model.layers.28.mlp.gate_proj": {
971
- "group_size": 128,
972
  "bits": 4
973
  },
974
  "model.layers.28.mlp.down_proj": {
975
- "group_size": 128,
976
  "bits": 4
977
  },
978
  "model.layers.28.mlp.up_proj": {
979
- "group_size": 128,
980
  "bits": 4
981
  },
982
  "model.layers.28.input_layernorm": false,
983
  "model.layers.28.post_attention_layernorm": false,
984
  "model.layers.29.self_attn.q_proj": {
985
- "group_size": 128,
986
  "bits": 4
987
  },
988
  "model.layers.29.self_attn.k_proj": {
989
- "group_size": 128,
990
  "bits": 4
991
  },
992
  "model.layers.29.self_attn.v_proj": {
993
- "group_size": 128,
994
  "bits": 4
995
  },
996
  "model.layers.29.self_attn.o_proj": {
997
- "group_size": 128,
998
  "bits": 4
999
  },
1000
  "model.layers.29.self_attn.q_norm": false,
1001
  "model.layers.29.self_attn.k_norm": false,
1002
  "model.layers.29.self_attn.rope": false,
1003
  "model.layers.29.mlp.gate_proj": {
1004
- "group_size": 128,
1005
  "bits": 4
1006
  },
1007
  "model.layers.29.mlp.down_proj": {
1008
- "group_size": 128,
1009
  "bits": 4
1010
  },
1011
  "model.layers.29.mlp.up_proj": {
1012
- "group_size": 128,
1013
  "bits": 4
1014
  },
1015
  "model.layers.29.input_layernorm": false,
1016
  "model.layers.29.post_attention_layernorm": false,
1017
  "model.layers.30.self_attn.q_proj": {
1018
- "group_size": 128,
1019
  "bits": 4
1020
  },
1021
  "model.layers.30.self_attn.k_proj": {
1022
- "group_size": 128,
1023
  "bits": 4
1024
  },
1025
  "model.layers.30.self_attn.v_proj": {
1026
- "group_size": 128,
1027
  "bits": 4
1028
  },
1029
  "model.layers.30.self_attn.o_proj": {
1030
- "group_size": 128,
1031
  "bits": 4
1032
  },
1033
  "model.layers.30.self_attn.q_norm": false,
1034
  "model.layers.30.self_attn.k_norm": false,
1035
  "model.layers.30.self_attn.rope": false,
1036
  "model.layers.30.mlp.gate_proj": {
1037
- "group_size": 128,
1038
  "bits": 4
1039
  },
1040
  "model.layers.30.mlp.down_proj": {
1041
- "group_size": 128,
1042
  "bits": 4
1043
  },
1044
  "model.layers.30.mlp.up_proj": {
1045
- "group_size": 128,
1046
  "bits": 4
1047
  },
1048
  "model.layers.30.input_layernorm": false,
1049
  "model.layers.30.post_attention_layernorm": false,
1050
  "model.layers.31.self_attn.q_proj": {
1051
- "group_size": 128,
1052
  "bits": 4
1053
  },
1054
  "model.layers.31.self_attn.k_proj": {
1055
- "group_size": 128,
1056
  "bits": 4
1057
  },
1058
  "model.layers.31.self_attn.v_proj": {
1059
- "group_size": 128,
1060
  "bits": 4
1061
  },
1062
  "model.layers.31.self_attn.o_proj": {
1063
- "group_size": 128,
1064
  "bits": 4
1065
  },
1066
  "model.layers.31.self_attn.q_norm": false,
1067
  "model.layers.31.self_attn.k_norm": false,
1068
  "model.layers.31.self_attn.rope": false,
1069
  "model.layers.31.mlp.gate_proj": {
1070
- "group_size": 128,
1071
  "bits": 4
1072
  },
1073
  "model.layers.31.mlp.down_proj": {
1074
- "group_size": 128,
1075
  "bits": 4
1076
  },
1077
  "model.layers.31.mlp.up_proj": {
1078
- "group_size": 128,
1079
  "bits": 4
1080
  },
1081
  "model.layers.31.input_layernorm": false,
1082
  "model.layers.31.post_attention_layernorm": false,
1083
  "model.layers.32.self_attn.q_proj": {
1084
- "group_size": 128,
1085
  "bits": 4
1086
  },
1087
  "model.layers.32.self_attn.k_proj": {
1088
- "group_size": 128,
1089
  "bits": 4
1090
  },
1091
  "model.layers.32.self_attn.v_proj": {
1092
- "group_size": 128,
1093
  "bits": 4
1094
  },
1095
  "model.layers.32.self_attn.o_proj": {
1096
- "group_size": 128,
1097
  "bits": 4
1098
  },
1099
  "model.layers.32.self_attn.q_norm": false,
1100
  "model.layers.32.self_attn.k_norm": false,
1101
  "model.layers.32.self_attn.rope": false,
1102
  "model.layers.32.mlp.gate_proj": {
1103
- "group_size": 128,
1104
  "bits": 4
1105
  },
1106
  "model.layers.32.mlp.down_proj": {
1107
- "group_size": 128,
1108
  "bits": 4
1109
  },
1110
  "model.layers.32.mlp.up_proj": {
1111
- "group_size": 128,
1112
  "bits": 4
1113
  },
1114
  "model.layers.32.input_layernorm": false,
1115
  "model.layers.32.post_attention_layernorm": false,
1116
  "model.layers.33.self_attn.q_proj": {
1117
- "group_size": 128,
1118
  "bits": 4
1119
  },
1120
  "model.layers.33.self_attn.k_proj": {
1121
- "group_size": 128,
1122
  "bits": 4
1123
  },
1124
  "model.layers.33.self_attn.v_proj": {
1125
- "group_size": 128,
1126
  "bits": 4
1127
  },
1128
  "model.layers.33.self_attn.o_proj": {
1129
- "group_size": 128,
1130
  "bits": 4
1131
  },
1132
  "model.layers.33.self_attn.q_norm": false,
1133
  "model.layers.33.self_attn.k_norm": false,
1134
  "model.layers.33.self_attn.rope": false,
1135
  "model.layers.33.mlp.gate_proj": {
1136
- "group_size": 128,
1137
  "bits": 4
1138
  },
1139
  "model.layers.33.mlp.down_proj": {
1140
- "group_size": 128,
1141
  "bits": 4
1142
  },
1143
  "model.layers.33.mlp.up_proj": {
1144
- "group_size": 128,
1145
  "bits": 4
1146
  },
1147
  "model.layers.33.input_layernorm": false,
1148
  "model.layers.33.post_attention_layernorm": false,
1149
  "model.layers.34.self_attn.q_proj": {
1150
- "group_size": 128,
1151
  "bits": 4
1152
  },
1153
  "model.layers.34.self_attn.k_proj": {
1154
- "group_size": 128,
1155
  "bits": 4
1156
  },
1157
  "model.layers.34.self_attn.v_proj": {
1158
- "group_size": 128,
1159
  "bits": 4
1160
  },
1161
  "model.layers.34.self_attn.o_proj": {
1162
- "group_size": 128,
1163
  "bits": 4
1164
  },
1165
  "model.layers.34.self_attn.q_norm": false,
1166
  "model.layers.34.self_attn.k_norm": false,
1167
  "model.layers.34.self_attn.rope": false,
1168
  "model.layers.34.mlp.gate_proj": {
1169
- "group_size": 128,
1170
  "bits": 4
1171
  },
1172
  "model.layers.34.mlp.down_proj": {
1173
- "group_size": 128,
1174
  "bits": 4
1175
  },
1176
  "model.layers.34.mlp.up_proj": {
1177
- "group_size": 128,
1178
  "bits": 4
1179
  },
1180
  "model.layers.34.input_layernorm": false,
1181
  "model.layers.34.post_attention_layernorm": false,
1182
  "model.layers.35.self_attn.q_proj": {
1183
- "group_size": 128,
1184
  "bits": 4
1185
  },
1186
  "model.layers.35.self_attn.k_proj": {
1187
- "group_size": 128,
1188
  "bits": 4
1189
  },
1190
  "model.layers.35.self_attn.v_proj": {
1191
- "group_size": 128,
1192
  "bits": 4
1193
  },
1194
  "model.layers.35.self_attn.o_proj": {
1195
- "group_size": 128,
1196
  "bits": 4
1197
  },
1198
  "model.layers.35.self_attn.q_norm": false,
1199
  "model.layers.35.self_attn.k_norm": false,
1200
  "model.layers.35.self_attn.rope": false,
1201
  "model.layers.35.mlp.gate_proj": {
1202
- "group_size": 128,
1203
  "bits": 4
1204
  },
1205
  "model.layers.35.mlp.down_proj": {
1206
- "group_size": 128,
1207
  "bits": 4
1208
  },
1209
  "model.layers.35.mlp.up_proj": {
1210
- "group_size": 128,
1211
  "bits": 4
1212
  },
1213
  "model.layers.35.input_layernorm": false,
1214
  "model.layers.35.post_attention_layernorm": false,
1215
  "model.layers.36.self_attn.q_proj": {
1216
- "group_size": 128,
1217
  "bits": 4
1218
  },
1219
  "model.layers.36.self_attn.k_proj": {
1220
- "group_size": 128,
1221
  "bits": 4
1222
  },
1223
  "model.layers.36.self_attn.v_proj": {
1224
- "group_size": 128,
1225
  "bits": 4
1226
  },
1227
  "model.layers.36.self_attn.o_proj": {
1228
- "group_size": 128,
1229
  "bits": 4
1230
  },
1231
  "model.layers.36.self_attn.q_norm": false,
1232
  "model.layers.36.self_attn.k_norm": false,
1233
  "model.layers.36.self_attn.rope": false,
1234
  "model.layers.36.mlp.gate_proj": {
1235
- "group_size": 128,
1236
  "bits": 4
1237
  },
1238
  "model.layers.36.mlp.down_proj": {
1239
- "group_size": 128,
1240
  "bits": 4
1241
  },
1242
  "model.layers.36.mlp.up_proj": {
1243
- "group_size": 128,
1244
  "bits": 4
1245
  },
1246
  "model.layers.36.input_layernorm": false,
1247
  "model.layers.36.post_attention_layernorm": false,
1248
  "model.layers.37.self_attn.q_proj": {
1249
- "group_size": 128,
1250
  "bits": 4
1251
  },
1252
  "model.layers.37.self_attn.k_proj": {
1253
- "group_size": 128,
1254
  "bits": 4
1255
  },
1256
  "model.layers.37.self_attn.v_proj": {
1257
- "group_size": 128,
1258
  "bits": 4
1259
  },
1260
  "model.layers.37.self_attn.o_proj": {
1261
- "group_size": 128,
1262
  "bits": 4
1263
  },
1264
  "model.layers.37.self_attn.q_norm": false,
1265
  "model.layers.37.self_attn.k_norm": false,
1266
  "model.layers.37.self_attn.rope": false,
1267
  "model.layers.37.mlp.gate_proj": {
1268
- "group_size": 128,
1269
  "bits": 4
1270
  },
1271
  "model.layers.37.mlp.down_proj": {
1272
- "group_size": 128,
1273
  "bits": 4
1274
  },
1275
  "model.layers.37.mlp.up_proj": {
1276
- "group_size": 128,
1277
  "bits": 4
1278
  },
1279
  "model.layers.37.input_layernorm": false,
1280
  "model.layers.37.post_attention_layernorm": false,
1281
  "model.layers.38.self_attn.q_proj": {
1282
- "group_size": 128,
1283
  "bits": 4
1284
  },
1285
  "model.layers.38.self_attn.k_proj": {
1286
- "group_size": 128,
1287
  "bits": 4
1288
  },
1289
  "model.layers.38.self_attn.v_proj": {
1290
- "group_size": 128,
1291
  "bits": 4
1292
  },
1293
  "model.layers.38.self_attn.o_proj": {
1294
- "group_size": 128,
1295
  "bits": 4
1296
  },
1297
  "model.layers.38.self_attn.q_norm": false,
1298
  "model.layers.38.self_attn.k_norm": false,
1299
  "model.layers.38.self_attn.rope": false,
1300
  "model.layers.38.mlp.gate_proj": {
1301
- "group_size": 128,
1302
  "bits": 4
1303
  },
1304
  "model.layers.38.mlp.down_proj": {
1305
- "group_size": 128,
1306
  "bits": 4
1307
  },
1308
  "model.layers.38.mlp.up_proj": {
1309
- "group_size": 128,
1310
  "bits": 4
1311
  },
1312
  "model.layers.38.input_layernorm": false,
1313
  "model.layers.38.post_attention_layernorm": false,
1314
  "model.layers.39.self_attn.q_proj": {
1315
- "group_size": 128,
1316
  "bits": 4
1317
  },
1318
  "model.layers.39.self_attn.k_proj": {
1319
- "group_size": 128,
1320
  "bits": 4
1321
  },
1322
  "model.layers.39.self_attn.v_proj": {
1323
- "group_size": 128,
1324
  "bits": 4
1325
  },
1326
  "model.layers.39.self_attn.o_proj": {
1327
- "group_size": 128,
1328
  "bits": 4
1329
  },
1330
  "model.layers.39.self_attn.q_norm": false,
1331
  "model.layers.39.self_attn.k_norm": false,
1332
  "model.layers.39.self_attn.rope": false,
1333
  "model.layers.39.mlp.gate_proj": {
1334
- "group_size": 128,
1335
  "bits": 4
1336
  },
1337
  "model.layers.39.mlp.down_proj": {
1338
- "group_size": 128,
1339
  "bits": 4
1340
  },
1341
  "model.layers.39.mlp.up_proj": {
1342
- "group_size": 128,
1343
  "bits": 4
1344
  },
1345
  "model.layers.39.input_layernorm": false,
 
25
  "bits": 4
26
  },
27
  "model.layers.0.self_attn.q_proj": {
28
+ "group_size": 64,
29
  "bits": 4
30
  },
31
  "model.layers.0.self_attn.k_proj": {
32
+ "group_size": 64,
33
  "bits": 4
34
  },
35
  "model.layers.0.self_attn.v_proj": {
36
+ "group_size": 64,
37
  "bits": 4
38
  },
39
  "model.layers.0.self_attn.o_proj": {
40
+ "group_size": 64,
41
  "bits": 4
42
  },
43
  "model.layers.0.self_attn.q_norm": false,
44
  "model.layers.0.self_attn.k_norm": false,
45
  "model.layers.0.self_attn.rope": false,
46
  "model.layers.0.mlp.gate_proj": {
47
+ "group_size": 64,
48
  "bits": 4
49
  },
50
  "model.layers.0.mlp.down_proj": {
51
+ "group_size": 64,
52
  "bits": 4
53
  },
54
  "model.layers.0.mlp.up_proj": {
55
+ "group_size": 64,
56
  "bits": 4
57
  },
58
  "model.layers.0.input_layernorm": false,
59
  "model.layers.0.post_attention_layernorm": false,
60
  "model.layers.1.self_attn.q_proj": {
61
+ "group_size": 64,
62
  "bits": 4
63
  },
64
  "model.layers.1.self_attn.k_proj": {
65
+ "group_size": 64,
66
  "bits": 4
67
  },
68
  "model.layers.1.self_attn.v_proj": {
69
+ "group_size": 64,
70
  "bits": 4
71
  },
72
  "model.layers.1.self_attn.o_proj": {
73
+ "group_size": 64,
74
  "bits": 4
75
  },
76
  "model.layers.1.self_attn.q_norm": false,
77
  "model.layers.1.self_attn.k_norm": false,
78
  "model.layers.1.self_attn.rope": false,
79
  "model.layers.1.mlp.gate_proj": {
80
+ "group_size": 64,
81
  "bits": 4
82
  },
83
  "model.layers.1.mlp.down_proj": {
84
+ "group_size": 64,
85
  "bits": 4
86
  },
87
  "model.layers.1.mlp.up_proj": {
88
+ "group_size": 64,
89
  "bits": 4
90
  },
91
  "model.layers.1.input_layernorm": false,
92
  "model.layers.1.post_attention_layernorm": false,
93
  "model.layers.2.self_attn.q_proj": {
94
+ "group_size": 64,
95
  "bits": 4
96
  },
97
  "model.layers.2.self_attn.k_proj": {
98
+ "group_size": 64,
99
  "bits": 4
100
  },
101
  "model.layers.2.self_attn.v_proj": {
102
+ "group_size": 64,
103
  "bits": 4
104
  },
105
  "model.layers.2.self_attn.o_proj": {
106
+ "group_size": 64,
107
  "bits": 4
108
  },
109
  "model.layers.2.self_attn.q_norm": false,
110
  "model.layers.2.self_attn.k_norm": false,
111
  "model.layers.2.self_attn.rope": false,
112
  "model.layers.2.mlp.gate_proj": {
113
+ "group_size": 64,
114
  "bits": 4
115
  },
116
  "model.layers.2.mlp.down_proj": {
117
+ "group_size": 64,
118
  "bits": 4
119
  },
120
  "model.layers.2.mlp.up_proj": {
121
+ "group_size": 64,
122
  "bits": 4
123
  },
124
  "model.layers.2.input_layernorm": false,
125
  "model.layers.2.post_attention_layernorm": false,
126
  "model.layers.3.self_attn.q_proj": {
127
+ "group_size": 64,
128
  "bits": 4
129
  },
130
  "model.layers.3.self_attn.k_proj": {
131
+ "group_size": 64,
132
  "bits": 4
133
  },
134
  "model.layers.3.self_attn.v_proj": {
135
+ "group_size": 64,
136
  "bits": 4
137
  },
138
  "model.layers.3.self_attn.o_proj": {
139
+ "group_size": 64,
140
  "bits": 4
141
  },
142
  "model.layers.3.self_attn.q_norm": false,
143
  "model.layers.3.self_attn.k_norm": false,
144
  "model.layers.3.self_attn.rope": false,
145
  "model.layers.3.mlp.gate_proj": {
146
+ "group_size": 64,
147
  "bits": 4
148
  },
149
  "model.layers.3.mlp.down_proj": {
150
+ "group_size": 64,
151
  "bits": 4
152
  },
153
  "model.layers.3.mlp.up_proj": {
154
+ "group_size": 64,
155
  "bits": 4
156
  },
157
  "model.layers.3.input_layernorm": false,
158
  "model.layers.3.post_attention_layernorm": false,
159
  "model.layers.4.self_attn.q_proj": {
160
+ "group_size": 64,
161
  "bits": 4
162
  },
163
  "model.layers.4.self_attn.k_proj": {
164
+ "group_size": 64,
165
  "bits": 4
166
  },
167
  "model.layers.4.self_attn.v_proj": {
168
+ "group_size": 64,
169
  "bits": 4
170
  },
171
  "model.layers.4.self_attn.o_proj": {
172
+ "group_size": 64,
173
  "bits": 4
174
  },
175
  "model.layers.4.self_attn.q_norm": false,
176
  "model.layers.4.self_attn.k_norm": false,
177
  "model.layers.4.self_attn.rope": false,
178
  "model.layers.4.mlp.gate_proj": {
179
+ "group_size": 64,
180
  "bits": 4
181
  },
182
  "model.layers.4.mlp.down_proj": {
183
+ "group_size": 64,
184
  "bits": 4
185
  },
186
  "model.layers.4.mlp.up_proj": {
187
+ "group_size": 64,
188
  "bits": 4
189
  },
190
  "model.layers.4.input_layernorm": false,
191
  "model.layers.4.post_attention_layernorm": false,
192
  "model.layers.5.self_attn.q_proj": {
193
+ "group_size": 64,
194
  "bits": 4
195
  },
196
  "model.layers.5.self_attn.k_proj": {
197
+ "group_size": 64,
198
  "bits": 4
199
  },
200
  "model.layers.5.self_attn.v_proj": {
201
+ "group_size": 64,
202
  "bits": 4
203
  },
204
  "model.layers.5.self_attn.o_proj": {
205
+ "group_size": 64,
206
  "bits": 4
207
  },
208
  "model.layers.5.self_attn.q_norm": false,
209
  "model.layers.5.self_attn.k_norm": false,
210
  "model.layers.5.self_attn.rope": false,
211
  "model.layers.5.mlp.gate_proj": {
212
+ "group_size": 64,
213
  "bits": 4
214
  },
215
  "model.layers.5.mlp.down_proj": {
216
+ "group_size": 64,
217
  "bits": 4
218
  },
219
  "model.layers.5.mlp.up_proj": {
220
+ "group_size": 64,
221
  "bits": 4
222
  },
223
  "model.layers.5.input_layernorm": false,
224
  "model.layers.5.post_attention_layernorm": false,
225
  "model.layers.6.self_attn.q_proj": {
226
+ "group_size": 64,
227
  "bits": 4
228
  },
229
  "model.layers.6.self_attn.k_proj": {
230
+ "group_size": 64,
231
  "bits": 4
232
  },
233
  "model.layers.6.self_attn.v_proj": {
234
+ "group_size": 64,
235
  "bits": 4
236
  },
237
  "model.layers.6.self_attn.o_proj": {
238
+ "group_size": 64,
239
  "bits": 4
240
  },
241
  "model.layers.6.self_attn.q_norm": false,
242
  "model.layers.6.self_attn.k_norm": false,
243
  "model.layers.6.self_attn.rope": false,
244
  "model.layers.6.mlp.gate_proj": {
245
+ "group_size": 64,
246
  "bits": 4
247
  },
248
  "model.layers.6.mlp.down_proj": {
249
+ "group_size": 64,
250
  "bits": 4
251
  },
252
  "model.layers.6.mlp.up_proj": {
253
+ "group_size": 64,
254
  "bits": 4
255
  },
256
  "model.layers.6.input_layernorm": false,
257
  "model.layers.6.post_attention_layernorm": false,
258
  "model.layers.7.self_attn.q_proj": {
259
+ "group_size": 64,
260
  "bits": 4
261
  },
262
  "model.layers.7.self_attn.k_proj": {
263
+ "group_size": 64,
264
  "bits": 4
265
  },
266
  "model.layers.7.self_attn.v_proj": {
267
+ "group_size": 64,
268
  "bits": 4
269
  },
270
  "model.layers.7.self_attn.o_proj": {
271
+ "group_size": 64,
272
  "bits": 4
273
  },
274
  "model.layers.7.self_attn.q_norm": false,
275
  "model.layers.7.self_attn.k_norm": false,
276
  "model.layers.7.self_attn.rope": false,
277
  "model.layers.7.mlp.gate_proj": {
278
+ "group_size": 64,
279
  "bits": 4
280
  },
281
  "model.layers.7.mlp.down_proj": {
282
+ "group_size": 64,
283
  "bits": 4
284
  },
285
  "model.layers.7.mlp.up_proj": {
286
+ "group_size": 64,
287
  "bits": 4
288
  },
289
  "model.layers.7.input_layernorm": false,
290
  "model.layers.7.post_attention_layernorm": false,
291
  "model.layers.8.self_attn.q_proj": {
292
+ "group_size": 64,
293
  "bits": 4
294
  },
295
  "model.layers.8.self_attn.k_proj": {
296
+ "group_size": 64,
297
  "bits": 4
298
  },
299
  "model.layers.8.self_attn.v_proj": {
300
+ "group_size": 64,
301
  "bits": 4
302
  },
303
  "model.layers.8.self_attn.o_proj": {
304
+ "group_size": 64,
305
  "bits": 4
306
  },
307
  "model.layers.8.self_attn.q_norm": false,
308
  "model.layers.8.self_attn.k_norm": false,
309
  "model.layers.8.self_attn.rope": false,
310
  "model.layers.8.mlp.gate_proj": {
311
+ "group_size": 64,
312
  "bits": 4
313
  },
314
  "model.layers.8.mlp.down_proj": {
315
+ "group_size": 64,
316
  "bits": 4
317
  },
318
  "model.layers.8.mlp.up_proj": {
319
+ "group_size": 64,
320
  "bits": 4
321
  },
322
  "model.layers.8.input_layernorm": false,
323
  "model.layers.8.post_attention_layernorm": false,
324
  "model.layers.9.self_attn.q_proj": {
325
+ "group_size": 64,
326
  "bits": 4
327
  },
328
  "model.layers.9.self_attn.k_proj": {
329
+ "group_size": 64,
330
  "bits": 4
331
  },
332
  "model.layers.9.self_attn.v_proj": {
333
+ "group_size": 64,
334
  "bits": 4
335
  },
336
  "model.layers.9.self_attn.o_proj": {
337
+ "group_size": 64,
338
  "bits": 4
339
  },
340
  "model.layers.9.self_attn.q_norm": false,
341
  "model.layers.9.self_attn.k_norm": false,
342
  "model.layers.9.self_attn.rope": false,
343
  "model.layers.9.mlp.gate_proj": {
344
+ "group_size": 64,
345
  "bits": 4
346
  },
347
  "model.layers.9.mlp.down_proj": {
348
+ "group_size": 64,
349
  "bits": 4
350
  },
351
  "model.layers.9.mlp.up_proj": {
352
+ "group_size": 64,
353
  "bits": 4
354
  },
355
  "model.layers.9.input_layernorm": false,
356
  "model.layers.9.post_attention_layernorm": false,
357
  "model.layers.10.self_attn.q_proj": {
358
+ "group_size": 64,
359
  "bits": 4
360
  },
361
  "model.layers.10.self_attn.k_proj": {
362
+ "group_size": 64,
363
  "bits": 4
364
  },
365
  "model.layers.10.self_attn.v_proj": {
366
+ "group_size": 64,
367
  "bits": 4
368
  },
369
  "model.layers.10.self_attn.o_proj": {
370
+ "group_size": 64,
371
  "bits": 4
372
  },
373
  "model.layers.10.self_attn.q_norm": false,
374
  "model.layers.10.self_attn.k_norm": false,
375
  "model.layers.10.self_attn.rope": false,
376
  "model.layers.10.mlp.gate_proj": {
377
+ "group_size": 64,
378
  "bits": 4
379
  },
380
  "model.layers.10.mlp.down_proj": {
381
+ "group_size": 64,
382
  "bits": 4
383
  },
384
  "model.layers.10.mlp.up_proj": {
385
+ "group_size": 64,
386
  "bits": 4
387
  },
388
  "model.layers.10.input_layernorm": false,
389
  "model.layers.10.post_attention_layernorm": false,
390
  "model.layers.11.self_attn.q_proj": {
391
+ "group_size": 64,
392
  "bits": 4
393
  },
394
  "model.layers.11.self_attn.k_proj": {
395
+ "group_size": 64,
396
  "bits": 4
397
  },
398
  "model.layers.11.self_attn.v_proj": {
399
+ "group_size": 64,
400
  "bits": 4
401
  },
402
  "model.layers.11.self_attn.o_proj": {
403
+ "group_size": 64,
404
  "bits": 4
405
  },
406
  "model.layers.11.self_attn.q_norm": false,
407
  "model.layers.11.self_attn.k_norm": false,
408
  "model.layers.11.self_attn.rope": false,
409
  "model.layers.11.mlp.gate_proj": {
410
+ "group_size": 64,
411
  "bits": 4
412
  },
413
  "model.layers.11.mlp.down_proj": {
414
+ "group_size": 64,
415
  "bits": 4
416
  },
417
  "model.layers.11.mlp.up_proj": {
418
+ "group_size": 64,
419
  "bits": 4
420
  },
421
  "model.layers.11.input_layernorm": false,
422
  "model.layers.11.post_attention_layernorm": false,
423
  "model.layers.12.self_attn.q_proj": {
424
+ "group_size": 64,
425
  "bits": 4
426
  },
427
  "model.layers.12.self_attn.k_proj": {
428
+ "group_size": 64,
429
  "bits": 4
430
  },
431
  "model.layers.12.self_attn.v_proj": {
432
+ "group_size": 64,
433
  "bits": 4
434
  },
435
  "model.layers.12.self_attn.o_proj": {
436
+ "group_size": 64,
437
  "bits": 4
438
  },
439
  "model.layers.12.self_attn.q_norm": false,
440
  "model.layers.12.self_attn.k_norm": false,
441
  "model.layers.12.self_attn.rope": false,
442
  "model.layers.12.mlp.gate_proj": {
443
+ "group_size": 64,
444
  "bits": 4
445
  },
446
  "model.layers.12.mlp.down_proj": {
447
+ "group_size": 64,
448
  "bits": 4
449
  },
450
  "model.layers.12.mlp.up_proj": {
451
+ "group_size": 64,
452
  "bits": 4
453
  },
454
  "model.layers.12.input_layernorm": false,
455
  "model.layers.12.post_attention_layernorm": false,
456
  "model.layers.13.self_attn.q_proj": {
457
+ "group_size": 64,
458
  "bits": 4
459
  },
460
  "model.layers.13.self_attn.k_proj": {
461
+ "group_size": 64,
462
  "bits": 4
463
  },
464
  "model.layers.13.self_attn.v_proj": {
465
+ "group_size": 64,
466
  "bits": 4
467
  },
468
  "model.layers.13.self_attn.o_proj": {
469
+ "group_size": 64,
470
  "bits": 4
471
  },
472
  "model.layers.13.self_attn.q_norm": false,
473
  "model.layers.13.self_attn.k_norm": false,
474
  "model.layers.13.self_attn.rope": false,
475
  "model.layers.13.mlp.gate_proj": {
476
+ "group_size": 64,
477
  "bits": 4
478
  },
479
  "model.layers.13.mlp.down_proj": {
480
+ "group_size": 64,
481
  "bits": 4
482
  },
483
  "model.layers.13.mlp.up_proj": {
484
+ "group_size": 64,
485
  "bits": 4
486
  },
487
  "model.layers.13.input_layernorm": false,
488
  "model.layers.13.post_attention_layernorm": false,
489
  "model.layers.14.self_attn.q_proj": {
490
+ "group_size": 64,
491
  "bits": 4
492
  },
493
  "model.layers.14.self_attn.k_proj": {
494
+ "group_size": 64,
495
  "bits": 4
496
  },
497
  "model.layers.14.self_attn.v_proj": {
498
+ "group_size": 64,
499
  "bits": 4
500
  },
501
  "model.layers.14.self_attn.o_proj": {
502
+ "group_size": 64,
503
  "bits": 4
504
  },
505
  "model.layers.14.self_attn.q_norm": false,
506
  "model.layers.14.self_attn.k_norm": false,
507
  "model.layers.14.self_attn.rope": false,
508
  "model.layers.14.mlp.gate_proj": {
509
+ "group_size": 64,
510
  "bits": 4
511
  },
512
  "model.layers.14.mlp.down_proj": {
513
+ "group_size": 64,
514
  "bits": 4
515
  },
516
  "model.layers.14.mlp.up_proj": {
517
+ "group_size": 64,
518
  "bits": 4
519
  },
520
  "model.layers.14.input_layernorm": false,
521
  "model.layers.14.post_attention_layernorm": false,
522
  "model.layers.15.self_attn.q_proj": {
523
+ "group_size": 64,
524
  "bits": 4
525
  },
526
  "model.layers.15.self_attn.k_proj": {
527
+ "group_size": 64,
528
  "bits": 4
529
  },
530
  "model.layers.15.self_attn.v_proj": {
531
+ "group_size": 64,
532
  "bits": 4
533
  },
534
  "model.layers.15.self_attn.o_proj": {
535
+ "group_size": 64,
536
  "bits": 4
537
  },
538
  "model.layers.15.self_attn.q_norm": false,
539
  "model.layers.15.self_attn.k_norm": false,
540
  "model.layers.15.self_attn.rope": false,
541
  "model.layers.15.mlp.gate_proj": {
542
+ "group_size": 64,
543
  "bits": 4
544
  },
545
  "model.layers.15.mlp.down_proj": {
546
+ "group_size": 64,
547
  "bits": 4
548
  },
549
  "model.layers.15.mlp.up_proj": {
550
+ "group_size": 64,
551
  "bits": 4
552
  },
553
  "model.layers.15.input_layernorm": false,
554
  "model.layers.15.post_attention_layernorm": false,
555
  "model.layers.16.self_attn.q_proj": {
556
+ "group_size": 64,
557
  "bits": 4
558
  },
559
  "model.layers.16.self_attn.k_proj": {
560
+ "group_size": 64,
561
  "bits": 4
562
  },
563
  "model.layers.16.self_attn.v_proj": {
564
+ "group_size": 64,
565
  "bits": 4
566
  },
567
  "model.layers.16.self_attn.o_proj": {
568
+ "group_size": 64,
569
  "bits": 4
570
  },
571
  "model.layers.16.self_attn.q_norm": false,
572
  "model.layers.16.self_attn.k_norm": false,
573
  "model.layers.16.self_attn.rope": false,
574
  "model.layers.16.mlp.gate_proj": {
575
+ "group_size": 64,
576
  "bits": 4
577
  },
578
  "model.layers.16.mlp.down_proj": {
579
+ "group_size": 64,
580
  "bits": 4
581
  },
582
  "model.layers.16.mlp.up_proj": {
583
+ "group_size": 64,
584
  "bits": 4
585
  },
586
  "model.layers.16.input_layernorm": false,
587
  "model.layers.16.post_attention_layernorm": false,
588
  "model.layers.17.self_attn.q_proj": {
589
+ "group_size": 64,
590
  "bits": 4
591
  },
592
  "model.layers.17.self_attn.k_proj": {
593
+ "group_size": 64,
594
  "bits": 4
595
  },
596
  "model.layers.17.self_attn.v_proj": {
597
+ "group_size": 64,
598
  "bits": 4
599
  },
600
  "model.layers.17.self_attn.o_proj": {
601
+ "group_size": 64,
602
  "bits": 4
603
  },
604
  "model.layers.17.self_attn.q_norm": false,
605
  "model.layers.17.self_attn.k_norm": false,
606
  "model.layers.17.self_attn.rope": false,
607
  "model.layers.17.mlp.gate_proj": {
608
+ "group_size": 64,
609
  "bits": 4
610
  },
611
  "model.layers.17.mlp.down_proj": {
612
+ "group_size": 64,
613
  "bits": 4
614
  },
615
  "model.layers.17.mlp.up_proj": {
616
+ "group_size": 64,
617
  "bits": 4
618
  },
619
  "model.layers.17.input_layernorm": false,
620
  "model.layers.17.post_attention_layernorm": false,
621
  "model.layers.18.self_attn.q_proj": {
622
+ "group_size": 64,
623
  "bits": 4
624
  },
625
  "model.layers.18.self_attn.k_proj": {
626
+ "group_size": 64,
627
  "bits": 4
628
  },
629
  "model.layers.18.self_attn.v_proj": {
630
+ "group_size": 64,
631
  "bits": 4
632
  },
633
  "model.layers.18.self_attn.o_proj": {
634
+ "group_size": 64,
635
  "bits": 4
636
  },
637
  "model.layers.18.self_attn.q_norm": false,
638
  "model.layers.18.self_attn.k_norm": false,
639
  "model.layers.18.self_attn.rope": false,
640
  "model.layers.18.mlp.gate_proj": {
641
+ "group_size": 64,
642
  "bits": 4
643
  },
644
  "model.layers.18.mlp.down_proj": {
645
+ "group_size": 64,
646
  "bits": 4
647
  },
648
  "model.layers.18.mlp.up_proj": {
649
+ "group_size": 64,
650
  "bits": 4
651
  },
652
  "model.layers.18.input_layernorm": false,
653
  "model.layers.18.post_attention_layernorm": false,
654
  "model.layers.19.self_attn.q_proj": {
655
+ "group_size": 64,
656
  "bits": 4
657
  },
658
  "model.layers.19.self_attn.k_proj": {
659
+ "group_size": 64,
660
  "bits": 4
661
  },
662
  "model.layers.19.self_attn.v_proj": {
663
+ "group_size": 64,
664
  "bits": 4
665
  },
666
  "model.layers.19.self_attn.o_proj": {
667
+ "group_size": 64,
668
  "bits": 4
669
  },
670
  "model.layers.19.self_attn.q_norm": false,
671
  "model.layers.19.self_attn.k_norm": false,
672
  "model.layers.19.self_attn.rope": false,
673
  "model.layers.19.mlp.gate_proj": {
674
+ "group_size": 64,
675
  "bits": 4
676
  },
677
  "model.layers.19.mlp.down_proj": {
678
+ "group_size": 64,
679
  "bits": 4
680
  },
681
  "model.layers.19.mlp.up_proj": {
682
+ "group_size": 64,
683
  "bits": 4
684
  },
685
  "model.layers.19.input_layernorm": false,
686
  "model.layers.19.post_attention_layernorm": false,
687
  "model.layers.20.self_attn.q_proj": {
688
+ "group_size": 64,
689
  "bits": 4
690
  },
691
  "model.layers.20.self_attn.k_proj": {
692
+ "group_size": 64,
693
  "bits": 4
694
  },
695
  "model.layers.20.self_attn.v_proj": {
696
+ "group_size": 64,
697
  "bits": 4
698
  },
699
  "model.layers.20.self_attn.o_proj": {
700
+ "group_size": 64,
701
  "bits": 4
702
  },
703
  "model.layers.20.self_attn.q_norm": false,
704
  "model.layers.20.self_attn.k_norm": false,
705
  "model.layers.20.self_attn.rope": false,
706
  "model.layers.20.mlp.gate_proj": {
707
+ "group_size": 64,
708
  "bits": 4
709
  },
710
  "model.layers.20.mlp.down_proj": {
711
+ "group_size": 64,
712
  "bits": 4
713
  },
714
  "model.layers.20.mlp.up_proj": {
715
+ "group_size": 64,
716
  "bits": 4
717
  },
718
  "model.layers.20.input_layernorm": false,
719
  "model.layers.20.post_attention_layernorm": false,
720
  "model.layers.21.self_attn.q_proj": {
721
+ "group_size": 64,
722
  "bits": 4
723
  },
724
  "model.layers.21.self_attn.k_proj": {
725
+ "group_size": 64,
726
  "bits": 4
727
  },
728
  "model.layers.21.self_attn.v_proj": {
729
+ "group_size": 64,
730
  "bits": 4
731
  },
732
  "model.layers.21.self_attn.o_proj": {
733
+ "group_size": 64,
734
  "bits": 4
735
  },
736
  "model.layers.21.self_attn.q_norm": false,
737
  "model.layers.21.self_attn.k_norm": false,
738
  "model.layers.21.self_attn.rope": false,
739
  "model.layers.21.mlp.gate_proj": {
740
+ "group_size": 64,
741
  "bits": 4
742
  },
743
  "model.layers.21.mlp.down_proj": {
744
+ "group_size": 64,
745
  "bits": 4
746
  },
747
  "model.layers.21.mlp.up_proj": {
748
+ "group_size": 64,
749
  "bits": 4
750
  },
751
  "model.layers.21.input_layernorm": false,
752
  "model.layers.21.post_attention_layernorm": false,
753
  "model.layers.22.self_attn.q_proj": {
754
+ "group_size": 64,
755
  "bits": 4
756
  },
757
  "model.layers.22.self_attn.k_proj": {
758
+ "group_size": 64,
759
  "bits": 4
760
  },
761
  "model.layers.22.self_attn.v_proj": {
762
+ "group_size": 64,
763
  "bits": 4
764
  },
765
  "model.layers.22.self_attn.o_proj": {
766
+ "group_size": 64,
767
  "bits": 4
768
  },
769
  "model.layers.22.self_attn.q_norm": false,
770
  "model.layers.22.self_attn.k_norm": false,
771
  "model.layers.22.self_attn.rope": false,
772
  "model.layers.22.mlp.gate_proj": {
773
+ "group_size": 64,
774
  "bits": 4
775
  },
776
  "model.layers.22.mlp.down_proj": {
777
+ "group_size": 64,
778
  "bits": 4
779
  },
780
  "model.layers.22.mlp.up_proj": {
781
+ "group_size": 64,
782
  "bits": 4
783
  },
784
  "model.layers.22.input_layernorm": false,
785
  "model.layers.22.post_attention_layernorm": false,
786
  "model.layers.23.self_attn.q_proj": {
787
+ "group_size": 64,
788
  "bits": 4
789
  },
790
  "model.layers.23.self_attn.k_proj": {
791
+ "group_size": 64,
792
  "bits": 4
793
  },
794
  "model.layers.23.self_attn.v_proj": {
795
+ "group_size": 64,
796
  "bits": 4
797
  },
798
  "model.layers.23.self_attn.o_proj": {
799
+ "group_size": 64,
800
  "bits": 4
801
  },
802
  "model.layers.23.self_attn.q_norm": false,
803
  "model.layers.23.self_attn.k_norm": false,
804
  "model.layers.23.self_attn.rope": false,
805
  "model.layers.23.mlp.gate_proj": {
806
+ "group_size": 64,
807
  "bits": 4
808
  },
809
  "model.layers.23.mlp.down_proj": {
810
+ "group_size": 64,
811
  "bits": 4
812
  },
813
  "model.layers.23.mlp.up_proj": {
814
+ "group_size": 64,
815
  "bits": 4
816
  },
817
  "model.layers.23.input_layernorm": false,
818
  "model.layers.23.post_attention_layernorm": false,
819
  "model.layers.24.self_attn.q_proj": {
820
+ "group_size": 64,
821
  "bits": 4
822
  },
823
  "model.layers.24.self_attn.k_proj": {
824
+ "group_size": 64,
825
  "bits": 4
826
  },
827
  "model.layers.24.self_attn.v_proj": {
828
+ "group_size": 64,
829
  "bits": 4
830
  },
831
  "model.layers.24.self_attn.o_proj": {
832
+ "group_size": 64,
833
  "bits": 4
834
  },
835
  "model.layers.24.self_attn.q_norm": false,
836
  "model.layers.24.self_attn.k_norm": false,
837
  "model.layers.24.self_attn.rope": false,
838
  "model.layers.24.mlp.gate_proj": {
839
+ "group_size": 64,
840
  "bits": 4
841
  },
842
  "model.layers.24.mlp.down_proj": {
843
+ "group_size": 64,
844
  "bits": 4
845
  },
846
  "model.layers.24.mlp.up_proj": {
847
+ "group_size": 64,
848
  "bits": 4
849
  },
850
  "model.layers.24.input_layernorm": false,
851
  "model.layers.24.post_attention_layernorm": false,
852
  "model.layers.25.self_attn.q_proj": {
853
+ "group_size": 64,
854
  "bits": 4
855
  },
856
  "model.layers.25.self_attn.k_proj": {
857
+ "group_size": 64,
858
  "bits": 4
859
  },
860
  "model.layers.25.self_attn.v_proj": {
861
+ "group_size": 64,
862
  "bits": 4
863
  },
864
  "model.layers.25.self_attn.o_proj": {
865
+ "group_size": 64,
866
  "bits": 4
867
  },
868
  "model.layers.25.self_attn.q_norm": false,
869
  "model.layers.25.self_attn.k_norm": false,
870
  "model.layers.25.self_attn.rope": false,
871
  "model.layers.25.mlp.gate_proj": {
872
+ "group_size": 64,
873
  "bits": 4
874
  },
875
  "model.layers.25.mlp.down_proj": {
876
+ "group_size": 64,
877
  "bits": 4
878
  },
879
  "model.layers.25.mlp.up_proj": {
880
+ "group_size": 64,
881
  "bits": 4
882
  },
883
  "model.layers.25.input_layernorm": false,
884
  "model.layers.25.post_attention_layernorm": false,
885
  "model.layers.26.self_attn.q_proj": {
886
+ "group_size": 64,
887
  "bits": 4
888
  },
889
  "model.layers.26.self_attn.k_proj": {
890
+ "group_size": 64,
891
  "bits": 4
892
  },
893
  "model.layers.26.self_attn.v_proj": {
894
+ "group_size": 64,
895
  "bits": 4
896
  },
897
  "model.layers.26.self_attn.o_proj": {
898
+ "group_size": 64,
899
  "bits": 4
900
  },
901
  "model.layers.26.self_attn.q_norm": false,
902
  "model.layers.26.self_attn.k_norm": false,
903
  "model.layers.26.self_attn.rope": false,
904
  "model.layers.26.mlp.gate_proj": {
905
+ "group_size": 64,
906
  "bits": 4
907
  },
908
  "model.layers.26.mlp.down_proj": {
909
+ "group_size": 64,
910
  "bits": 4
911
  },
912
  "model.layers.26.mlp.up_proj": {
913
+ "group_size": 64,
914
  "bits": 4
915
  },
916
  "model.layers.26.input_layernorm": false,
917
  "model.layers.26.post_attention_layernorm": false,
918
  "model.layers.27.self_attn.q_proj": {
919
+ "group_size": 64,
920
  "bits": 4
921
  },
922
  "model.layers.27.self_attn.k_proj": {
923
+ "group_size": 64,
924
  "bits": 4
925
  },
926
  "model.layers.27.self_attn.v_proj": {
927
+ "group_size": 64,
928
  "bits": 4
929
  },
930
  "model.layers.27.self_attn.o_proj": {
931
+ "group_size": 64,
932
  "bits": 4
933
  },
934
  "model.layers.27.self_attn.q_norm": false,
935
  "model.layers.27.self_attn.k_norm": false,
936
  "model.layers.27.self_attn.rope": false,
937
  "model.layers.27.mlp.gate_proj": {
938
+ "group_size": 64,
939
  "bits": 4
940
  },
941
  "model.layers.27.mlp.down_proj": {
942
+ "group_size": 64,
943
  "bits": 4
944
  },
945
  "model.layers.27.mlp.up_proj": {
946
+ "group_size": 64,
947
  "bits": 4
948
  },
949
  "model.layers.27.input_layernorm": false,
950
  "model.layers.27.post_attention_layernorm": false,
951
  "model.layers.28.self_attn.q_proj": {
952
+ "group_size": 64,
953
  "bits": 4
954
  },
955
  "model.layers.28.self_attn.k_proj": {
956
+ "group_size": 64,
957
  "bits": 4
958
  },
959
  "model.layers.28.self_attn.v_proj": {
960
+ "group_size": 64,
961
  "bits": 4
962
  },
963
  "model.layers.28.self_attn.o_proj": {
964
+ "group_size": 64,
965
  "bits": 4
966
  },
967
  "model.layers.28.self_attn.q_norm": false,
968
  "model.layers.28.self_attn.k_norm": false,
969
  "model.layers.28.self_attn.rope": false,
970
  "model.layers.28.mlp.gate_proj": {
971
+ "group_size": 64,
972
  "bits": 4
973
  },
974
  "model.layers.28.mlp.down_proj": {
975
+ "group_size": 64,
976
  "bits": 4
977
  },
978
  "model.layers.28.mlp.up_proj": {
979
+ "group_size": 64,
980
  "bits": 4
981
  },
982
  "model.layers.28.input_layernorm": false,
983
  "model.layers.28.post_attention_layernorm": false,
984
  "model.layers.29.self_attn.q_proj": {
985
+ "group_size": 64,
986
  "bits": 4
987
  },
988
  "model.layers.29.self_attn.k_proj": {
989
+ "group_size": 64,
990
  "bits": 4
991
  },
992
  "model.layers.29.self_attn.v_proj": {
993
+ "group_size": 64,
994
  "bits": 4
995
  },
996
  "model.layers.29.self_attn.o_proj": {
997
+ "group_size": 64,
998
  "bits": 4
999
  },
1000
  "model.layers.29.self_attn.q_norm": false,
1001
  "model.layers.29.self_attn.k_norm": false,
1002
  "model.layers.29.self_attn.rope": false,
1003
  "model.layers.29.mlp.gate_proj": {
1004
+ "group_size": 64,
1005
  "bits": 4
1006
  },
1007
  "model.layers.29.mlp.down_proj": {
1008
+ "group_size": 64,
1009
  "bits": 4
1010
  },
1011
  "model.layers.29.mlp.up_proj": {
1012
+ "group_size": 64,
1013
  "bits": 4
1014
  },
1015
  "model.layers.29.input_layernorm": false,
1016
  "model.layers.29.post_attention_layernorm": false,
1017
  "model.layers.30.self_attn.q_proj": {
1018
+ "group_size": 64,
1019
  "bits": 4
1020
  },
1021
  "model.layers.30.self_attn.k_proj": {
1022
+ "group_size": 64,
1023
  "bits": 4
1024
  },
1025
  "model.layers.30.self_attn.v_proj": {
1026
+ "group_size": 64,
1027
  "bits": 4
1028
  },
1029
  "model.layers.30.self_attn.o_proj": {
1030
+ "group_size": 64,
1031
  "bits": 4
1032
  },
1033
  "model.layers.30.self_attn.q_norm": false,
1034
  "model.layers.30.self_attn.k_norm": false,
1035
  "model.layers.30.self_attn.rope": false,
1036
  "model.layers.30.mlp.gate_proj": {
1037
+ "group_size": 64,
1038
  "bits": 4
1039
  },
1040
  "model.layers.30.mlp.down_proj": {
1041
+ "group_size": 64,
1042
  "bits": 4
1043
  },
1044
  "model.layers.30.mlp.up_proj": {
1045
+ "group_size": 64,
1046
  "bits": 4
1047
  },
1048
  "model.layers.30.input_layernorm": false,
1049
  "model.layers.30.post_attention_layernorm": false,
1050
  "model.layers.31.self_attn.q_proj": {
1051
+ "group_size": 64,
1052
  "bits": 4
1053
  },
1054
  "model.layers.31.self_attn.k_proj": {
1055
+ "group_size": 64,
1056
  "bits": 4
1057
  },
1058
  "model.layers.31.self_attn.v_proj": {
1059
+ "group_size": 64,
1060
  "bits": 4
1061
  },
1062
  "model.layers.31.self_attn.o_proj": {
1063
+ "group_size": 64,
1064
  "bits": 4
1065
  },
1066
  "model.layers.31.self_attn.q_norm": false,
1067
  "model.layers.31.self_attn.k_norm": false,
1068
  "model.layers.31.self_attn.rope": false,
1069
  "model.layers.31.mlp.gate_proj": {
1070
+ "group_size": 64,
1071
  "bits": 4
1072
  },
1073
  "model.layers.31.mlp.down_proj": {
1074
+ "group_size": 64,
1075
  "bits": 4
1076
  },
1077
  "model.layers.31.mlp.up_proj": {
1078
+ "group_size": 64,
1079
  "bits": 4
1080
  },
1081
  "model.layers.31.input_layernorm": false,
1082
  "model.layers.31.post_attention_layernorm": false,
1083
  "model.layers.32.self_attn.q_proj": {
1084
+ "group_size": 64,
1085
  "bits": 4
1086
  },
1087
  "model.layers.32.self_attn.k_proj": {
1088
+ "group_size": 64,
1089
  "bits": 4
1090
  },
1091
  "model.layers.32.self_attn.v_proj": {
1092
+ "group_size": 64,
1093
  "bits": 4
1094
  },
1095
  "model.layers.32.self_attn.o_proj": {
1096
+ "group_size": 64,
1097
  "bits": 4
1098
  },
1099
  "model.layers.32.self_attn.q_norm": false,
1100
  "model.layers.32.self_attn.k_norm": false,
1101
  "model.layers.32.self_attn.rope": false,
1102
  "model.layers.32.mlp.gate_proj": {
1103
+ "group_size": 64,
1104
  "bits": 4
1105
  },
1106
  "model.layers.32.mlp.down_proj": {
1107
+ "group_size": 64,
1108
  "bits": 4
1109
  },
1110
  "model.layers.32.mlp.up_proj": {
1111
+ "group_size": 64,
1112
  "bits": 4
1113
  },
1114
  "model.layers.32.input_layernorm": false,
1115
  "model.layers.32.post_attention_layernorm": false,
1116
  "model.layers.33.self_attn.q_proj": {
1117
+ "group_size": 64,
1118
  "bits": 4
1119
  },
1120
  "model.layers.33.self_attn.k_proj": {
1121
+ "group_size": 64,
1122
  "bits": 4
1123
  },
1124
  "model.layers.33.self_attn.v_proj": {
1125
+ "group_size": 64,
1126
  "bits": 4
1127
  },
1128
  "model.layers.33.self_attn.o_proj": {
1129
+ "group_size": 64,
1130
  "bits": 4
1131
  },
1132
  "model.layers.33.self_attn.q_norm": false,
1133
  "model.layers.33.self_attn.k_norm": false,
1134
  "model.layers.33.self_attn.rope": false,
1135
  "model.layers.33.mlp.gate_proj": {
1136
+ "group_size": 64,
1137
  "bits": 4
1138
  },
1139
  "model.layers.33.mlp.down_proj": {
1140
+ "group_size": 64,
1141
  "bits": 4
1142
  },
1143
  "model.layers.33.mlp.up_proj": {
1144
+ "group_size": 64,
1145
  "bits": 4
1146
  },
1147
  "model.layers.33.input_layernorm": false,
1148
  "model.layers.33.post_attention_layernorm": false,
1149
  "model.layers.34.self_attn.q_proj": {
1150
+ "group_size": 64,
1151
  "bits": 4
1152
  },
1153
  "model.layers.34.self_attn.k_proj": {
1154
+ "group_size": 64,
1155
  "bits": 4
1156
  },
1157
  "model.layers.34.self_attn.v_proj": {
1158
+ "group_size": 64,
1159
  "bits": 4
1160
  },
1161
  "model.layers.34.self_attn.o_proj": {
1162
+ "group_size": 64,
1163
  "bits": 4
1164
  },
1165
  "model.layers.34.self_attn.q_norm": false,
1166
  "model.layers.34.self_attn.k_norm": false,
1167
  "model.layers.34.self_attn.rope": false,
1168
  "model.layers.34.mlp.gate_proj": {
1169
+ "group_size": 64,
1170
  "bits": 4
1171
  },
1172
  "model.layers.34.mlp.down_proj": {
1173
+ "group_size": 64,
1174
  "bits": 4
1175
  },
1176
  "model.layers.34.mlp.up_proj": {
1177
+ "group_size": 64,
1178
  "bits": 4
1179
  },
1180
  "model.layers.34.input_layernorm": false,
1181
  "model.layers.34.post_attention_layernorm": false,
1182
  "model.layers.35.self_attn.q_proj": {
1183
+ "group_size": 64,
1184
  "bits": 4
1185
  },
1186
  "model.layers.35.self_attn.k_proj": {
1187
+ "group_size": 64,
1188
  "bits": 4
1189
  },
1190
  "model.layers.35.self_attn.v_proj": {
1191
+ "group_size": 64,
1192
  "bits": 4
1193
  },
1194
  "model.layers.35.self_attn.o_proj": {
1195
+ "group_size": 64,
1196
  "bits": 4
1197
  },
1198
  "model.layers.35.self_attn.q_norm": false,
1199
  "model.layers.35.self_attn.k_norm": false,
1200
  "model.layers.35.self_attn.rope": false,
1201
  "model.layers.35.mlp.gate_proj": {
1202
+ "group_size": 64,
1203
  "bits": 4
1204
  },
1205
  "model.layers.35.mlp.down_proj": {
1206
+ "group_size": 64,
1207
  "bits": 4
1208
  },
1209
  "model.layers.35.mlp.up_proj": {
1210
+ "group_size": 64,
1211
  "bits": 4
1212
  },
1213
  "model.layers.35.input_layernorm": false,
1214
  "model.layers.35.post_attention_layernorm": false,
1215
  "model.layers.36.self_attn.q_proj": {
1216
+ "group_size": 64,
1217
  "bits": 4
1218
  },
1219
  "model.layers.36.self_attn.k_proj": {
1220
+ "group_size": 64,
1221
  "bits": 4
1222
  },
1223
  "model.layers.36.self_attn.v_proj": {
1224
+ "group_size": 64,
1225
  "bits": 4
1226
  },
1227
  "model.layers.36.self_attn.o_proj": {
1228
+ "group_size": 64,
1229
  "bits": 4
1230
  },
1231
  "model.layers.36.self_attn.q_norm": false,
1232
  "model.layers.36.self_attn.k_norm": false,
1233
  "model.layers.36.self_attn.rope": false,
1234
  "model.layers.36.mlp.gate_proj": {
1235
+ "group_size": 64,
1236
  "bits": 4
1237
  },
1238
  "model.layers.36.mlp.down_proj": {
1239
+ "group_size": 64,
1240
  "bits": 4
1241
  },
1242
  "model.layers.36.mlp.up_proj": {
1243
+ "group_size": 64,
1244
  "bits": 4
1245
  },
1246
  "model.layers.36.input_layernorm": false,
1247
  "model.layers.36.post_attention_layernorm": false,
1248
  "model.layers.37.self_attn.q_proj": {
1249
+ "group_size": 64,
1250
  "bits": 4
1251
  },
1252
  "model.layers.37.self_attn.k_proj": {
1253
+ "group_size": 64,
1254
  "bits": 4
1255
  },
1256
  "model.layers.37.self_attn.v_proj": {
1257
+ "group_size": 64,
1258
  "bits": 4
1259
  },
1260
  "model.layers.37.self_attn.o_proj": {
1261
+ "group_size": 64,
1262
  "bits": 4
1263
  },
1264
  "model.layers.37.self_attn.q_norm": false,
1265
  "model.layers.37.self_attn.k_norm": false,
1266
  "model.layers.37.self_attn.rope": false,
1267
  "model.layers.37.mlp.gate_proj": {
1268
+ "group_size": 64,
1269
  "bits": 4
1270
  },
1271
  "model.layers.37.mlp.down_proj": {
1272
+ "group_size": 64,
1273
  "bits": 4
1274
  },
1275
  "model.layers.37.mlp.up_proj": {
1276
+ "group_size": 64,
1277
  "bits": 4
1278
  },
1279
  "model.layers.37.input_layernorm": false,
1280
  "model.layers.37.post_attention_layernorm": false,
1281
  "model.layers.38.self_attn.q_proj": {
1282
+ "group_size": 64,
1283
  "bits": 4
1284
  },
1285
  "model.layers.38.self_attn.k_proj": {
1286
+ "group_size": 64,
1287
  "bits": 4
1288
  },
1289
  "model.layers.38.self_attn.v_proj": {
1290
+ "group_size": 64,
1291
  "bits": 4
1292
  },
1293
  "model.layers.38.self_attn.o_proj": {
1294
+ "group_size": 64,
1295
  "bits": 4
1296
  },
1297
  "model.layers.38.self_attn.q_norm": false,
1298
  "model.layers.38.self_attn.k_norm": false,
1299
  "model.layers.38.self_attn.rope": false,
1300
  "model.layers.38.mlp.gate_proj": {
1301
+ "group_size": 64,
1302
  "bits": 4
1303
  },
1304
  "model.layers.38.mlp.down_proj": {
1305
+ "group_size": 64,
1306
  "bits": 4
1307
  },
1308
  "model.layers.38.mlp.up_proj": {
1309
+ "group_size": 64,
1310
  "bits": 4
1311
  },
1312
  "model.layers.38.input_layernorm": false,
1313
  "model.layers.38.post_attention_layernorm": false,
1314
  "model.layers.39.self_attn.q_proj": {
1315
+ "group_size": 64,
1316
  "bits": 4
1317
  },
1318
  "model.layers.39.self_attn.k_proj": {
1319
+ "group_size": 64,
1320
  "bits": 4
1321
  },
1322
  "model.layers.39.self_attn.v_proj": {
1323
+ "group_size": 64,
1324
  "bits": 4
1325
  },
1326
  "model.layers.39.self_attn.o_proj": {
1327
+ "group_size": 64,
1328
  "bits": 4
1329
  },
1330
  "model.layers.39.self_attn.q_norm": false,
1331
  "model.layers.39.self_attn.k_norm": false,
1332
  "model.layers.39.self_attn.rope": false,
1333
  "model.layers.39.mlp.gate_proj": {
1334
+ "group_size": 64,
1335
  "bits": 4
1336
  },
1337
  "model.layers.39.mlp.down_proj": {
1338
+ "group_size": 64,
1339
  "bits": 4
1340
  },
1341
  "model.layers.39.mlp.up_proj": {
1342
+ "group_size": 64,
1343
  "bits": 4
1344
  },
1345
  "model.layers.39.input_layernorm": false,
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 7992145920
4
  },
5
  "weight_map": {
6
  "lm_head.biases": "model-00002-of-00002.safetensors",
@@ -484,17 +484,17 @@
484
  "model.layers.25.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
485
  "model.layers.25.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
486
  "model.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
487
- "model.layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors",
488
- "model.layers.26.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
489
- "model.layers.26.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
490
- "model.layers.26.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
491
- "model.layers.26.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
492
- "model.layers.26.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
493
- "model.layers.26.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
494
- "model.layers.26.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
495
- "model.layers.26.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
496
- "model.layers.26.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
497
- "model.layers.26.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
498
  "model.layers.26.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
499
  "model.layers.26.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
500
  "model.layers.26.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
@@ -510,30 +510,30 @@
510
  "model.layers.26.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
511
  "model.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
512
  "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
513
- "model.layers.27.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
514
- "model.layers.27.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
515
- "model.layers.27.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
516
- "model.layers.27.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
517
- "model.layers.27.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
518
- "model.layers.27.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
519
  "model.layers.27.mlp.up_proj.biases": "model-00002-of-00002.safetensors",
520
  "model.layers.27.mlp.up_proj.scales": "model-00002-of-00002.safetensors",
521
  "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
522
  "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
523
- "model.layers.27.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
524
- "model.layers.27.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
525
- "model.layers.27.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
526
- "model.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
527
- "model.layers.27.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
528
- "model.layers.27.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
529
- "model.layers.27.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
530
- "model.layers.27.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
531
- "model.layers.27.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
532
- "model.layers.27.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
533
- "model.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
534
- "model.layers.27.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
535
- "model.layers.27.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
536
- "model.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
537
  "model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
538
  "model.layers.28.mlp.down_proj.biases": "model-00002-of-00002.safetensors",
539
  "model.layers.28.mlp.down_proj.scales": "model-00002-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 8405022720
4
  },
5
  "weight_map": {
6
  "lm_head.biases": "model-00002-of-00002.safetensors",
 
484
  "model.layers.25.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
485
  "model.layers.25.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
486
  "model.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
487
+ "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
488
+ "model.layers.26.mlp.down_proj.biases": "model-00002-of-00002.safetensors",
489
+ "model.layers.26.mlp.down_proj.scales": "model-00002-of-00002.safetensors",
490
+ "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
491
+ "model.layers.26.mlp.gate_proj.biases": "model-00002-of-00002.safetensors",
492
+ "model.layers.26.mlp.gate_proj.scales": "model-00002-of-00002.safetensors",
493
+ "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
494
+ "model.layers.26.mlp.up_proj.biases": "model-00002-of-00002.safetensors",
495
+ "model.layers.26.mlp.up_proj.scales": "model-00002-of-00002.safetensors",
496
+ "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
497
+ "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
498
  "model.layers.26.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
499
  "model.layers.26.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
500
  "model.layers.26.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
 
510
  "model.layers.26.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
511
  "model.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
512
  "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
513
+ "model.layers.27.mlp.down_proj.biases": "model-00002-of-00002.safetensors",
514
+ "model.layers.27.mlp.down_proj.scales": "model-00002-of-00002.safetensors",
515
+ "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
516
+ "model.layers.27.mlp.gate_proj.biases": "model-00002-of-00002.safetensors",
517
+ "model.layers.27.mlp.gate_proj.scales": "model-00002-of-00002.safetensors",
518
+ "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
519
  "model.layers.27.mlp.up_proj.biases": "model-00002-of-00002.safetensors",
520
  "model.layers.27.mlp.up_proj.scales": "model-00002-of-00002.safetensors",
521
  "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
522
  "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
523
+ "model.layers.27.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
524
+ "model.layers.27.self_attn.k_proj.biases": "model-00002-of-00002.safetensors",
525
+ "model.layers.27.self_attn.k_proj.scales": "model-00002-of-00002.safetensors",
526
+ "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
527
+ "model.layers.27.self_attn.o_proj.biases": "model-00002-of-00002.safetensors",
528
+ "model.layers.27.self_attn.o_proj.scales": "model-00002-of-00002.safetensors",
529
+ "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
530
+ "model.layers.27.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
531
+ "model.layers.27.self_attn.q_proj.biases": "model-00002-of-00002.safetensors",
532
+ "model.layers.27.self_attn.q_proj.scales": "model-00002-of-00002.safetensors",
533
+ "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
534
+ "model.layers.27.self_attn.v_proj.biases": "model-00002-of-00002.safetensors",
535
+ "model.layers.27.self_attn.v_proj.scales": "model-00002-of-00002.safetensors",
536
+ "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
537
  "model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
538
  "model.layers.28.mlp.down_proj.biases": "model-00002-of-00002.safetensors",
539
  "model.layers.28.mlp.down_proj.scales": "model-00002-of-00002.safetensors",
tokenizer_config.json CHANGED
@@ -227,7 +227,9 @@
227
  "<|video_pad|>"
228
  ],
229
  "bos_token": null,
230
- "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- set tool_start = \"<tool_response>\" %}\n {%- set tool_start_length = tool_start|length %}\n {%- set start_of_message = message.content[:tool_start_length] %}\n {%- set tool_end = \"</tool_response>\" %}\n {%- set tool_end_length = tool_end|length %}\n {%- set start_pos = (message.content|length) - tool_end_length %}\n {%- if start_pos < 0 %}\n {%- set start_pos = 0 %}\n {%- endif %}\n {%- set end_of_message = message.content[start_pos:] %}\n {%- if ns.multi_step_tool and message.role == \"user\" and not(start_of_message == tool_start and end_of_message == tool_end) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in message.content %}\n {%- set content = (message.content.split('</think>')|last).lstrip('\\n') %}\n {%- set reasoning_content = (message.content.split('</think>')|first).rstrip('\\n') %}\n {%- set reasoning_content = (reasoning_content.split('<think>')|last).lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- endif %}\n{%- endif %}", "clean_up_tokenization_spaces": false,
 
 
231
  "eos_token": "<|im_end|>",
232
  "errors": "replace",
233
  "extra_special_tokens": {},
 
227
  "<|video_pad|>"
228
  ],
229
  "bos_token": null,
230
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for forward_message in messages %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- set message = messages[index] %}\n {%- set current_content = message.content if message.content is not none else '' %}\n {%- set tool_start = '<tool_response>' %}\n {%- set tool_start_length = tool_start|length %}\n {%- set start_of_message = current_content[:tool_start_length] %}\n {%- set tool_end = '</tool_response>' %}\n {%- set tool_end_length = tool_end|length %}\n {%- set start_pos = (current_content|length) - tool_end_length %}\n {%- if start_pos < 0 %}\n {%- set start_pos = 0 %}\n {%- endif %}\n {%- set end_of_message = current_content[start_pos:] %}\n {%- if ns.multi_step_tool and message.role == \"user\" and not(start_of_message == tool_start and end_of_message == tool_end) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in message.content %}\n {%- set content = (message.content.split('</think>')|last).lstrip('\\n') %}\n {%- set reasoning_content = (message.content.split('</think>')|first).rstrip('\\n') %}\n {%- set reasoning_content = (reasoning_content.split('<think>')|last).lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- endif %}\n{%- endif %}",
231
+
232
+ "clean_up_tokenization_spaces": false,
233
  "eos_token": "<|im_end|>",
234
  "errors": "replace",
235
  "extra_special_tokens": {},