sudo-0x2a commited on
Commit
0eaf39b
·
verified ·
1 Parent(s): 59758a6

Add files using upload-large-folder tool

Browse files
README.md CHANGED
@@ -1,20 +1,18 @@
1
  ---
2
- library_name: mlx
3
- license: apache-2.0
4
- license_link: https://huggingface.co/Qwen/Qwen3-14B/blob/main/LICENSE
5
- pipeline_tag: text-generation
6
- base_model: Qwen/Qwen3-14B
7
  tags:
 
8
  - mlx
 
 
 
 
9
  ---
10
 
11
  # mlx-community/Qwen3-14B-4bit-AWQ
12
 
13
  This model [mlx-community/Qwen3-14B-4bit-AWQ](https://huggingface.co/mlx-community/Qwen3-14B-4bit-AWQ) was
14
- converted to MLX format from [Qwen/Qwen3-14B](https://huggingface.co/Qwen/Qwen3-14B)
15
- using mlx-lm version **0.25.2**.
16
-
17
- AWQ Parameters: --bits 4 --group-size 64 --embed-bits 4 --embed-group-size 32 --num-samples 256 --sequence-length 1024 --n-grid 50
18
 
19
  ## Use with mlx
20
 
 
1
  ---
 
 
 
 
 
2
  tags:
3
+ - unsloth
4
  - mlx
5
+ base_model: unsloth/Qwen3-14B
6
+ license: apache-2.0
7
+ pipeline_tag: text-generation
8
+ library_name: mlx
9
  ---
10
 
11
  # mlx-community/Qwen3-14B-4bit-AWQ
12
 
13
  This model [mlx-community/Qwen3-14B-4bit-AWQ](https://huggingface.co/mlx-community/Qwen3-14B-4bit-AWQ) was
14
+ converted to MLX format from [unsloth/Qwen3-14B](https://huggingface.co/unsloth/Qwen3-14B)
15
+ using mlx-lm version **0.26.3**.
 
 
16
 
17
  ## Use with mlx
18
 
chat_template.jinja CHANGED
@@ -18,16 +18,17 @@
18
  {%- for forward_message in messages %}
19
  {%- set index = (messages|length - 1) - loop.index0 %}
20
  {%- set message = messages[index] %}
 
21
  {%- set tool_start = '<tool_response>' %}
22
  {%- set tool_start_length = tool_start|length %}
23
- {%- set start_of_message = message.content[:tool_start_length] %}
24
  {%- set tool_end = '</tool_response>' %}
25
  {%- set tool_end_length = tool_end|length %}
26
- {%- set start_pos = (message.content|length) - tool_end_length %}
27
  {%- if start_pos < 0 %}
28
  {%- set start_pos = 0 %}
29
  {%- endif %}
30
- {%- set end_of_message = message.content[start_pos:] %}
31
  {%- if ns.multi_step_tool and message.role == "user" and not(start_of_message == tool_start and end_of_message == tool_end) %}
32
  {%- set ns.multi_step_tool = false %}
33
  {%- set ns.last_query_index = index %}
 
18
  {%- for forward_message in messages %}
19
  {%- set index = (messages|length - 1) - loop.index0 %}
20
  {%- set message = messages[index] %}
21
+ {%- set current_content = message.content if message.content is not none else '' %}
22
  {%- set tool_start = '<tool_response>' %}
23
  {%- set tool_start_length = tool_start|length %}
24
+ {%- set start_of_message = current_content[:tool_start_length] %}
25
  {%- set tool_end = '</tool_response>' %}
26
  {%- set tool_end_length = tool_end|length %}
27
+ {%- set start_pos = (current_content|length) - tool_end_length %}
28
  {%- if start_pos < 0 %}
29
  {%- set start_pos = 0 %}
30
  {%- endif %}
31
+ {%- set end_of_message = current_content[start_pos:] %}
32
  {%- if ns.multi_step_tool and message.role == "user" and not(start_of_message == tool_start and end_of_message == tool_end) %}
33
  {%- set ns.multi_step_tool = false %}
34
  {%- set ns.last_query_index = index %}
config.json CHANGED
@@ -4,7 +4,6 @@
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
- "bos_token_id": 151643,
8
  "eos_token_id": 151645,
9
  "head_dim": 128,
10
  "hidden_act": "silu",
@@ -17,6 +16,7 @@
17
  "num_attention_heads": 40,
18
  "num_hidden_layers": 40,
19
  "num_key_value_heads": 8,
 
20
  "quantization": {
21
  "group_size": 64,
22
  "bits": 4,
@@ -1350,13 +1350,1347 @@
1350
  "bits": 4
1351
  }
1352
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1353
  "rms_norm_eps": 1e-06,
1354
  "rope_scaling": null,
1355
  "rope_theta": 1000000,
1356
  "sliding_window": null,
1357
  "tie_word_embeddings": false,
1358
  "torch_dtype": "bfloat16",
1359
- "transformers_version": "4.51.0",
 
1360
  "use_cache": true,
1361
  "use_sliding_window": false,
1362
  "vocab_size": 151936
 
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
 
7
  "eos_token_id": 151645,
8
  "head_dim": 128,
9
  "hidden_act": "silu",
 
16
  "num_attention_heads": 40,
17
  "num_hidden_layers": 40,
18
  "num_key_value_heads": 8,
19
+ "pad_token_id": 151654,
20
  "quantization": {
21
  "group_size": 64,
22
  "bits": 4,
 
1350
  "bits": 4
1351
  }
1352
  },
1353
+ "quantization_config": {
1354
+ "group_size": 64,
1355
+ "bits": 4,
1356
+ "model.embed_tokens": {
1357
+ "group_size": 32,
1358
+ "bits": 4
1359
+ },
1360
+ "model.layers.0.self_attn.q_proj": {
1361
+ "group_size": 64,
1362
+ "bits": 4
1363
+ },
1364
+ "model.layers.0.self_attn.k_proj": {
1365
+ "group_size": 64,
1366
+ "bits": 4
1367
+ },
1368
+ "model.layers.0.self_attn.v_proj": {
1369
+ "group_size": 64,
1370
+ "bits": 4
1371
+ },
1372
+ "model.layers.0.self_attn.o_proj": {
1373
+ "group_size": 64,
1374
+ "bits": 4
1375
+ },
1376
+ "model.layers.0.self_attn.q_norm": false,
1377
+ "model.layers.0.self_attn.k_norm": false,
1378
+ "model.layers.0.self_attn.rope": false,
1379
+ "model.layers.0.mlp.gate_proj": {
1380
+ "group_size": 64,
1381
+ "bits": 4
1382
+ },
1383
+ "model.layers.0.mlp.down_proj": {
1384
+ "group_size": 64,
1385
+ "bits": 4
1386
+ },
1387
+ "model.layers.0.mlp.up_proj": {
1388
+ "group_size": 64,
1389
+ "bits": 4
1390
+ },
1391
+ "model.layers.0.input_layernorm": false,
1392
+ "model.layers.0.post_attention_layernorm": false,
1393
+ "model.layers.1.self_attn.q_proj": {
1394
+ "group_size": 64,
1395
+ "bits": 4
1396
+ },
1397
+ "model.layers.1.self_attn.k_proj": {
1398
+ "group_size": 64,
1399
+ "bits": 4
1400
+ },
1401
+ "model.layers.1.self_attn.v_proj": {
1402
+ "group_size": 64,
1403
+ "bits": 4
1404
+ },
1405
+ "model.layers.1.self_attn.o_proj": {
1406
+ "group_size": 64,
1407
+ "bits": 4
1408
+ },
1409
+ "model.layers.1.self_attn.q_norm": false,
1410
+ "model.layers.1.self_attn.k_norm": false,
1411
+ "model.layers.1.self_attn.rope": false,
1412
+ "model.layers.1.mlp.gate_proj": {
1413
+ "group_size": 64,
1414
+ "bits": 4
1415
+ },
1416
+ "model.layers.1.mlp.down_proj": {
1417
+ "group_size": 64,
1418
+ "bits": 4
1419
+ },
1420
+ "model.layers.1.mlp.up_proj": {
1421
+ "group_size": 64,
1422
+ "bits": 4
1423
+ },
1424
+ "model.layers.1.input_layernorm": false,
1425
+ "model.layers.1.post_attention_layernorm": false,
1426
+ "model.layers.2.self_attn.q_proj": {
1427
+ "group_size": 64,
1428
+ "bits": 4
1429
+ },
1430
+ "model.layers.2.self_attn.k_proj": {
1431
+ "group_size": 64,
1432
+ "bits": 4
1433
+ },
1434
+ "model.layers.2.self_attn.v_proj": {
1435
+ "group_size": 64,
1436
+ "bits": 4
1437
+ },
1438
+ "model.layers.2.self_attn.o_proj": {
1439
+ "group_size": 64,
1440
+ "bits": 4
1441
+ },
1442
+ "model.layers.2.self_attn.q_norm": false,
1443
+ "model.layers.2.self_attn.k_norm": false,
1444
+ "model.layers.2.self_attn.rope": false,
1445
+ "model.layers.2.mlp.gate_proj": {
1446
+ "group_size": 64,
1447
+ "bits": 4
1448
+ },
1449
+ "model.layers.2.mlp.down_proj": {
1450
+ "group_size": 64,
1451
+ "bits": 4
1452
+ },
1453
+ "model.layers.2.mlp.up_proj": {
1454
+ "group_size": 64,
1455
+ "bits": 4
1456
+ },
1457
+ "model.layers.2.input_layernorm": false,
1458
+ "model.layers.2.post_attention_layernorm": false,
1459
+ "model.layers.3.self_attn.q_proj": {
1460
+ "group_size": 64,
1461
+ "bits": 4
1462
+ },
1463
+ "model.layers.3.self_attn.k_proj": {
1464
+ "group_size": 64,
1465
+ "bits": 4
1466
+ },
1467
+ "model.layers.3.self_attn.v_proj": {
1468
+ "group_size": 64,
1469
+ "bits": 4
1470
+ },
1471
+ "model.layers.3.self_attn.o_proj": {
1472
+ "group_size": 64,
1473
+ "bits": 4
1474
+ },
1475
+ "model.layers.3.self_attn.q_norm": false,
1476
+ "model.layers.3.self_attn.k_norm": false,
1477
+ "model.layers.3.self_attn.rope": false,
1478
+ "model.layers.3.mlp.gate_proj": {
1479
+ "group_size": 64,
1480
+ "bits": 4
1481
+ },
1482
+ "model.layers.3.mlp.down_proj": {
1483
+ "group_size": 64,
1484
+ "bits": 4
1485
+ },
1486
+ "model.layers.3.mlp.up_proj": {
1487
+ "group_size": 64,
1488
+ "bits": 4
1489
+ },
1490
+ "model.layers.3.input_layernorm": false,
1491
+ "model.layers.3.post_attention_layernorm": false,
1492
+ "model.layers.4.self_attn.q_proj": {
1493
+ "group_size": 64,
1494
+ "bits": 4
1495
+ },
1496
+ "model.layers.4.self_attn.k_proj": {
1497
+ "group_size": 64,
1498
+ "bits": 4
1499
+ },
1500
+ "model.layers.4.self_attn.v_proj": {
1501
+ "group_size": 64,
1502
+ "bits": 4
1503
+ },
1504
+ "model.layers.4.self_attn.o_proj": {
1505
+ "group_size": 64,
1506
+ "bits": 4
1507
+ },
1508
+ "model.layers.4.self_attn.q_norm": false,
1509
+ "model.layers.4.self_attn.k_norm": false,
1510
+ "model.layers.4.self_attn.rope": false,
1511
+ "model.layers.4.mlp.gate_proj": {
1512
+ "group_size": 64,
1513
+ "bits": 4
1514
+ },
1515
+ "model.layers.4.mlp.down_proj": {
1516
+ "group_size": 64,
1517
+ "bits": 4
1518
+ },
1519
+ "model.layers.4.mlp.up_proj": {
1520
+ "group_size": 64,
1521
+ "bits": 4
1522
+ },
1523
+ "model.layers.4.input_layernorm": false,
1524
+ "model.layers.4.post_attention_layernorm": false,
1525
+ "model.layers.5.self_attn.q_proj": {
1526
+ "group_size": 64,
1527
+ "bits": 4
1528
+ },
1529
+ "model.layers.5.self_attn.k_proj": {
1530
+ "group_size": 64,
1531
+ "bits": 4
1532
+ },
1533
+ "model.layers.5.self_attn.v_proj": {
1534
+ "group_size": 64,
1535
+ "bits": 4
1536
+ },
1537
+ "model.layers.5.self_attn.o_proj": {
1538
+ "group_size": 64,
1539
+ "bits": 4
1540
+ },
1541
+ "model.layers.5.self_attn.q_norm": false,
1542
+ "model.layers.5.self_attn.k_norm": false,
1543
+ "model.layers.5.self_attn.rope": false,
1544
+ "model.layers.5.mlp.gate_proj": {
1545
+ "group_size": 64,
1546
+ "bits": 4
1547
+ },
1548
+ "model.layers.5.mlp.down_proj": {
1549
+ "group_size": 64,
1550
+ "bits": 4
1551
+ },
1552
+ "model.layers.5.mlp.up_proj": {
1553
+ "group_size": 64,
1554
+ "bits": 4
1555
+ },
1556
+ "model.layers.5.input_layernorm": false,
1557
+ "model.layers.5.post_attention_layernorm": false,
1558
+ "model.layers.6.self_attn.q_proj": {
1559
+ "group_size": 64,
1560
+ "bits": 4
1561
+ },
1562
+ "model.layers.6.self_attn.k_proj": {
1563
+ "group_size": 64,
1564
+ "bits": 4
1565
+ },
1566
+ "model.layers.6.self_attn.v_proj": {
1567
+ "group_size": 64,
1568
+ "bits": 4
1569
+ },
1570
+ "model.layers.6.self_attn.o_proj": {
1571
+ "group_size": 64,
1572
+ "bits": 4
1573
+ },
1574
+ "model.layers.6.self_attn.q_norm": false,
1575
+ "model.layers.6.self_attn.k_norm": false,
1576
+ "model.layers.6.self_attn.rope": false,
1577
+ "model.layers.6.mlp.gate_proj": {
1578
+ "group_size": 64,
1579
+ "bits": 4
1580
+ },
1581
+ "model.layers.6.mlp.down_proj": {
1582
+ "group_size": 64,
1583
+ "bits": 4
1584
+ },
1585
+ "model.layers.6.mlp.up_proj": {
1586
+ "group_size": 64,
1587
+ "bits": 4
1588
+ },
1589
+ "model.layers.6.input_layernorm": false,
1590
+ "model.layers.6.post_attention_layernorm": false,
1591
+ "model.layers.7.self_attn.q_proj": {
1592
+ "group_size": 64,
1593
+ "bits": 4
1594
+ },
1595
+ "model.layers.7.self_attn.k_proj": {
1596
+ "group_size": 64,
1597
+ "bits": 4
1598
+ },
1599
+ "model.layers.7.self_attn.v_proj": {
1600
+ "group_size": 64,
1601
+ "bits": 4
1602
+ },
1603
+ "model.layers.7.self_attn.o_proj": {
1604
+ "group_size": 64,
1605
+ "bits": 4
1606
+ },
1607
+ "model.layers.7.self_attn.q_norm": false,
1608
+ "model.layers.7.self_attn.k_norm": false,
1609
+ "model.layers.7.self_attn.rope": false,
1610
+ "model.layers.7.mlp.gate_proj": {
1611
+ "group_size": 64,
1612
+ "bits": 4
1613
+ },
1614
+ "model.layers.7.mlp.down_proj": {
1615
+ "group_size": 64,
1616
+ "bits": 4
1617
+ },
1618
+ "model.layers.7.mlp.up_proj": {
1619
+ "group_size": 64,
1620
+ "bits": 4
1621
+ },
1622
+ "model.layers.7.input_layernorm": false,
1623
+ "model.layers.7.post_attention_layernorm": false,
1624
+ "model.layers.8.self_attn.q_proj": {
1625
+ "group_size": 64,
1626
+ "bits": 4
1627
+ },
1628
+ "model.layers.8.self_attn.k_proj": {
1629
+ "group_size": 64,
1630
+ "bits": 4
1631
+ },
1632
+ "model.layers.8.self_attn.v_proj": {
1633
+ "group_size": 64,
1634
+ "bits": 4
1635
+ },
1636
+ "model.layers.8.self_attn.o_proj": {
1637
+ "group_size": 64,
1638
+ "bits": 4
1639
+ },
1640
+ "model.layers.8.self_attn.q_norm": false,
1641
+ "model.layers.8.self_attn.k_norm": false,
1642
+ "model.layers.8.self_attn.rope": false,
1643
+ "model.layers.8.mlp.gate_proj": {
1644
+ "group_size": 64,
1645
+ "bits": 4
1646
+ },
1647
+ "model.layers.8.mlp.down_proj": {
1648
+ "group_size": 64,
1649
+ "bits": 4
1650
+ },
1651
+ "model.layers.8.mlp.up_proj": {
1652
+ "group_size": 64,
1653
+ "bits": 4
1654
+ },
1655
+ "model.layers.8.input_layernorm": false,
1656
+ "model.layers.8.post_attention_layernorm": false,
1657
+ "model.layers.9.self_attn.q_proj": {
1658
+ "group_size": 64,
1659
+ "bits": 4
1660
+ },
1661
+ "model.layers.9.self_attn.k_proj": {
1662
+ "group_size": 64,
1663
+ "bits": 4
1664
+ },
1665
+ "model.layers.9.self_attn.v_proj": {
1666
+ "group_size": 64,
1667
+ "bits": 4
1668
+ },
1669
+ "model.layers.9.self_attn.o_proj": {
1670
+ "group_size": 64,
1671
+ "bits": 4
1672
+ },
1673
+ "model.layers.9.self_attn.q_norm": false,
1674
+ "model.layers.9.self_attn.k_norm": false,
1675
+ "model.layers.9.self_attn.rope": false,
1676
+ "model.layers.9.mlp.gate_proj": {
1677
+ "group_size": 64,
1678
+ "bits": 4
1679
+ },
1680
+ "model.layers.9.mlp.down_proj": {
1681
+ "group_size": 64,
1682
+ "bits": 4
1683
+ },
1684
+ "model.layers.9.mlp.up_proj": {
1685
+ "group_size": 64,
1686
+ "bits": 4
1687
+ },
1688
+ "model.layers.9.input_layernorm": false,
1689
+ "model.layers.9.post_attention_layernorm": false,
1690
+ "model.layers.10.self_attn.q_proj": {
1691
+ "group_size": 64,
1692
+ "bits": 4
1693
+ },
1694
+ "model.layers.10.self_attn.k_proj": {
1695
+ "group_size": 64,
1696
+ "bits": 4
1697
+ },
1698
+ "model.layers.10.self_attn.v_proj": {
1699
+ "group_size": 64,
1700
+ "bits": 4
1701
+ },
1702
+ "model.layers.10.self_attn.o_proj": {
1703
+ "group_size": 64,
1704
+ "bits": 4
1705
+ },
1706
+ "model.layers.10.self_attn.q_norm": false,
1707
+ "model.layers.10.self_attn.k_norm": false,
1708
+ "model.layers.10.self_attn.rope": false,
1709
+ "model.layers.10.mlp.gate_proj": {
1710
+ "group_size": 64,
1711
+ "bits": 4
1712
+ },
1713
+ "model.layers.10.mlp.down_proj": {
1714
+ "group_size": 64,
1715
+ "bits": 4
1716
+ },
1717
+ "model.layers.10.mlp.up_proj": {
1718
+ "group_size": 64,
1719
+ "bits": 4
1720
+ },
1721
+ "model.layers.10.input_layernorm": false,
1722
+ "model.layers.10.post_attention_layernorm": false,
1723
+ "model.layers.11.self_attn.q_proj": {
1724
+ "group_size": 64,
1725
+ "bits": 4
1726
+ },
1727
+ "model.layers.11.self_attn.k_proj": {
1728
+ "group_size": 64,
1729
+ "bits": 4
1730
+ },
1731
+ "model.layers.11.self_attn.v_proj": {
1732
+ "group_size": 64,
1733
+ "bits": 4
1734
+ },
1735
+ "model.layers.11.self_attn.o_proj": {
1736
+ "group_size": 64,
1737
+ "bits": 4
1738
+ },
1739
+ "model.layers.11.self_attn.q_norm": false,
1740
+ "model.layers.11.self_attn.k_norm": false,
1741
+ "model.layers.11.self_attn.rope": false,
1742
+ "model.layers.11.mlp.gate_proj": {
1743
+ "group_size": 64,
1744
+ "bits": 4
1745
+ },
1746
+ "model.layers.11.mlp.down_proj": {
1747
+ "group_size": 64,
1748
+ "bits": 4
1749
+ },
1750
+ "model.layers.11.mlp.up_proj": {
1751
+ "group_size": 64,
1752
+ "bits": 4
1753
+ },
1754
+ "model.layers.11.input_layernorm": false,
1755
+ "model.layers.11.post_attention_layernorm": false,
1756
+ "model.layers.12.self_attn.q_proj": {
1757
+ "group_size": 64,
1758
+ "bits": 4
1759
+ },
1760
+ "model.layers.12.self_attn.k_proj": {
1761
+ "group_size": 64,
1762
+ "bits": 4
1763
+ },
1764
+ "model.layers.12.self_attn.v_proj": {
1765
+ "group_size": 64,
1766
+ "bits": 4
1767
+ },
1768
+ "model.layers.12.self_attn.o_proj": {
1769
+ "group_size": 64,
1770
+ "bits": 4
1771
+ },
1772
+ "model.layers.12.self_attn.q_norm": false,
1773
+ "model.layers.12.self_attn.k_norm": false,
1774
+ "model.layers.12.self_attn.rope": false,
1775
+ "model.layers.12.mlp.gate_proj": {
1776
+ "group_size": 64,
1777
+ "bits": 4
1778
+ },
1779
+ "model.layers.12.mlp.down_proj": {
1780
+ "group_size": 64,
1781
+ "bits": 4
1782
+ },
1783
+ "model.layers.12.mlp.up_proj": {
1784
+ "group_size": 64,
1785
+ "bits": 4
1786
+ },
1787
+ "model.layers.12.input_layernorm": false,
1788
+ "model.layers.12.post_attention_layernorm": false,
1789
+ "model.layers.13.self_attn.q_proj": {
1790
+ "group_size": 64,
1791
+ "bits": 4
1792
+ },
1793
+ "model.layers.13.self_attn.k_proj": {
1794
+ "group_size": 64,
1795
+ "bits": 4
1796
+ },
1797
+ "model.layers.13.self_attn.v_proj": {
1798
+ "group_size": 64,
1799
+ "bits": 4
1800
+ },
1801
+ "model.layers.13.self_attn.o_proj": {
1802
+ "group_size": 64,
1803
+ "bits": 4
1804
+ },
1805
+ "model.layers.13.self_attn.q_norm": false,
1806
+ "model.layers.13.self_attn.k_norm": false,
1807
+ "model.layers.13.self_attn.rope": false,
1808
+ "model.layers.13.mlp.gate_proj": {
1809
+ "group_size": 64,
1810
+ "bits": 4
1811
+ },
1812
+ "model.layers.13.mlp.down_proj": {
1813
+ "group_size": 64,
1814
+ "bits": 4
1815
+ },
1816
+ "model.layers.13.mlp.up_proj": {
1817
+ "group_size": 64,
1818
+ "bits": 4
1819
+ },
1820
+ "model.layers.13.input_layernorm": false,
1821
+ "model.layers.13.post_attention_layernorm": false,
1822
+ "model.layers.14.self_attn.q_proj": {
1823
+ "group_size": 64,
1824
+ "bits": 4
1825
+ },
1826
+ "model.layers.14.self_attn.k_proj": {
1827
+ "group_size": 64,
1828
+ "bits": 4
1829
+ },
1830
+ "model.layers.14.self_attn.v_proj": {
1831
+ "group_size": 64,
1832
+ "bits": 4
1833
+ },
1834
+ "model.layers.14.self_attn.o_proj": {
1835
+ "group_size": 64,
1836
+ "bits": 4
1837
+ },
1838
+ "model.layers.14.self_attn.q_norm": false,
1839
+ "model.layers.14.self_attn.k_norm": false,
1840
+ "model.layers.14.self_attn.rope": false,
1841
+ "model.layers.14.mlp.gate_proj": {
1842
+ "group_size": 64,
1843
+ "bits": 4
1844
+ },
1845
+ "model.layers.14.mlp.down_proj": {
1846
+ "group_size": 64,
1847
+ "bits": 4
1848
+ },
1849
+ "model.layers.14.mlp.up_proj": {
1850
+ "group_size": 64,
1851
+ "bits": 4
1852
+ },
1853
+ "model.layers.14.input_layernorm": false,
1854
+ "model.layers.14.post_attention_layernorm": false,
1855
+ "model.layers.15.self_attn.q_proj": {
1856
+ "group_size": 64,
1857
+ "bits": 4
1858
+ },
1859
+ "model.layers.15.self_attn.k_proj": {
1860
+ "group_size": 64,
1861
+ "bits": 4
1862
+ },
1863
+ "model.layers.15.self_attn.v_proj": {
1864
+ "group_size": 64,
1865
+ "bits": 4
1866
+ },
1867
+ "model.layers.15.self_attn.o_proj": {
1868
+ "group_size": 64,
1869
+ "bits": 4
1870
+ },
1871
+ "model.layers.15.self_attn.q_norm": false,
1872
+ "model.layers.15.self_attn.k_norm": false,
1873
+ "model.layers.15.self_attn.rope": false,
1874
+ "model.layers.15.mlp.gate_proj": {
1875
+ "group_size": 64,
1876
+ "bits": 4
1877
+ },
1878
+ "model.layers.15.mlp.down_proj": {
1879
+ "group_size": 64,
1880
+ "bits": 4
1881
+ },
1882
+ "model.layers.15.mlp.up_proj": {
1883
+ "group_size": 64,
1884
+ "bits": 4
1885
+ },
1886
+ "model.layers.15.input_layernorm": false,
1887
+ "model.layers.15.post_attention_layernorm": false,
1888
+ "model.layers.16.self_attn.q_proj": {
1889
+ "group_size": 64,
1890
+ "bits": 4
1891
+ },
1892
+ "model.layers.16.self_attn.k_proj": {
1893
+ "group_size": 64,
1894
+ "bits": 4
1895
+ },
1896
+ "model.layers.16.self_attn.v_proj": {
1897
+ "group_size": 64,
1898
+ "bits": 4
1899
+ },
1900
+ "model.layers.16.self_attn.o_proj": {
1901
+ "group_size": 64,
1902
+ "bits": 4
1903
+ },
1904
+ "model.layers.16.self_attn.q_norm": false,
1905
+ "model.layers.16.self_attn.k_norm": false,
1906
+ "model.layers.16.self_attn.rope": false,
1907
+ "model.layers.16.mlp.gate_proj": {
1908
+ "group_size": 64,
1909
+ "bits": 4
1910
+ },
1911
+ "model.layers.16.mlp.down_proj": {
1912
+ "group_size": 64,
1913
+ "bits": 4
1914
+ },
1915
+ "model.layers.16.mlp.up_proj": {
1916
+ "group_size": 64,
1917
+ "bits": 4
1918
+ },
1919
+ "model.layers.16.input_layernorm": false,
1920
+ "model.layers.16.post_attention_layernorm": false,
1921
+ "model.layers.17.self_attn.q_proj": {
1922
+ "group_size": 64,
1923
+ "bits": 4
1924
+ },
1925
+ "model.layers.17.self_attn.k_proj": {
1926
+ "group_size": 64,
1927
+ "bits": 4
1928
+ },
1929
+ "model.layers.17.self_attn.v_proj": {
1930
+ "group_size": 64,
1931
+ "bits": 4
1932
+ },
1933
+ "model.layers.17.self_attn.o_proj": {
1934
+ "group_size": 64,
1935
+ "bits": 4
1936
+ },
1937
+ "model.layers.17.self_attn.q_norm": false,
1938
+ "model.layers.17.self_attn.k_norm": false,
1939
+ "model.layers.17.self_attn.rope": false,
1940
+ "model.layers.17.mlp.gate_proj": {
1941
+ "group_size": 64,
1942
+ "bits": 4
1943
+ },
1944
+ "model.layers.17.mlp.down_proj": {
1945
+ "group_size": 64,
1946
+ "bits": 4
1947
+ },
1948
+ "model.layers.17.mlp.up_proj": {
1949
+ "group_size": 64,
1950
+ "bits": 4
1951
+ },
1952
+ "model.layers.17.input_layernorm": false,
1953
+ "model.layers.17.post_attention_layernorm": false,
1954
+ "model.layers.18.self_attn.q_proj": {
1955
+ "group_size": 64,
1956
+ "bits": 4
1957
+ },
1958
+ "model.layers.18.self_attn.k_proj": {
1959
+ "group_size": 64,
1960
+ "bits": 4
1961
+ },
1962
+ "model.layers.18.self_attn.v_proj": {
1963
+ "group_size": 64,
1964
+ "bits": 4
1965
+ },
1966
+ "model.layers.18.self_attn.o_proj": {
1967
+ "group_size": 64,
1968
+ "bits": 4
1969
+ },
1970
+ "model.layers.18.self_attn.q_norm": false,
1971
+ "model.layers.18.self_attn.k_norm": false,
1972
+ "model.layers.18.self_attn.rope": false,
1973
+ "model.layers.18.mlp.gate_proj": {
1974
+ "group_size": 64,
1975
+ "bits": 4
1976
+ },
1977
+ "model.layers.18.mlp.down_proj": {
1978
+ "group_size": 64,
1979
+ "bits": 4
1980
+ },
1981
+ "model.layers.18.mlp.up_proj": {
1982
+ "group_size": 64,
1983
+ "bits": 4
1984
+ },
1985
+ "model.layers.18.input_layernorm": false,
1986
+ "model.layers.18.post_attention_layernorm": false,
1987
+ "model.layers.19.self_attn.q_proj": {
1988
+ "group_size": 64,
1989
+ "bits": 4
1990
+ },
1991
+ "model.layers.19.self_attn.k_proj": {
1992
+ "group_size": 64,
1993
+ "bits": 4
1994
+ },
1995
+ "model.layers.19.self_attn.v_proj": {
1996
+ "group_size": 64,
1997
+ "bits": 4
1998
+ },
1999
+ "model.layers.19.self_attn.o_proj": {
2000
+ "group_size": 64,
2001
+ "bits": 4
2002
+ },
2003
+ "model.layers.19.self_attn.q_norm": false,
2004
+ "model.layers.19.self_attn.k_norm": false,
2005
+ "model.layers.19.self_attn.rope": false,
2006
+ "model.layers.19.mlp.gate_proj": {
2007
+ "group_size": 64,
2008
+ "bits": 4
2009
+ },
2010
+ "model.layers.19.mlp.down_proj": {
2011
+ "group_size": 64,
2012
+ "bits": 4
2013
+ },
2014
+ "model.layers.19.mlp.up_proj": {
2015
+ "group_size": 64,
2016
+ "bits": 4
2017
+ },
2018
+ "model.layers.19.input_layernorm": false,
2019
+ "model.layers.19.post_attention_layernorm": false,
2020
+ "model.layers.20.self_attn.q_proj": {
2021
+ "group_size": 64,
2022
+ "bits": 4
2023
+ },
2024
+ "model.layers.20.self_attn.k_proj": {
2025
+ "group_size": 64,
2026
+ "bits": 4
2027
+ },
2028
+ "model.layers.20.self_attn.v_proj": {
2029
+ "group_size": 64,
2030
+ "bits": 4
2031
+ },
2032
+ "model.layers.20.self_attn.o_proj": {
2033
+ "group_size": 64,
2034
+ "bits": 4
2035
+ },
2036
+ "model.layers.20.self_attn.q_norm": false,
2037
+ "model.layers.20.self_attn.k_norm": false,
2038
+ "model.layers.20.self_attn.rope": false,
2039
+ "model.layers.20.mlp.gate_proj": {
2040
+ "group_size": 64,
2041
+ "bits": 4
2042
+ },
2043
+ "model.layers.20.mlp.down_proj": {
2044
+ "group_size": 64,
2045
+ "bits": 4
2046
+ },
2047
+ "model.layers.20.mlp.up_proj": {
2048
+ "group_size": 64,
2049
+ "bits": 4
2050
+ },
2051
+ "model.layers.20.input_layernorm": false,
2052
+ "model.layers.20.post_attention_layernorm": false,
2053
+ "model.layers.21.self_attn.q_proj": {
2054
+ "group_size": 64,
2055
+ "bits": 4
2056
+ },
2057
+ "model.layers.21.self_attn.k_proj": {
2058
+ "group_size": 64,
2059
+ "bits": 4
2060
+ },
2061
+ "model.layers.21.self_attn.v_proj": {
2062
+ "group_size": 64,
2063
+ "bits": 4
2064
+ },
2065
+ "model.layers.21.self_attn.o_proj": {
2066
+ "group_size": 64,
2067
+ "bits": 4
2068
+ },
2069
+ "model.layers.21.self_attn.q_norm": false,
2070
+ "model.layers.21.self_attn.k_norm": false,
2071
+ "model.layers.21.self_attn.rope": false,
2072
+ "model.layers.21.mlp.gate_proj": {
2073
+ "group_size": 64,
2074
+ "bits": 4
2075
+ },
2076
+ "model.layers.21.mlp.down_proj": {
2077
+ "group_size": 64,
2078
+ "bits": 4
2079
+ },
2080
+ "model.layers.21.mlp.up_proj": {
2081
+ "group_size": 64,
2082
+ "bits": 4
2083
+ },
2084
+ "model.layers.21.input_layernorm": false,
2085
+ "model.layers.21.post_attention_layernorm": false,
2086
+ "model.layers.22.self_attn.q_proj": {
2087
+ "group_size": 64,
2088
+ "bits": 4
2089
+ },
2090
+ "model.layers.22.self_attn.k_proj": {
2091
+ "group_size": 64,
2092
+ "bits": 4
2093
+ },
2094
+ "model.layers.22.self_attn.v_proj": {
2095
+ "group_size": 64,
2096
+ "bits": 4
2097
+ },
2098
+ "model.layers.22.self_attn.o_proj": {
2099
+ "group_size": 64,
2100
+ "bits": 4
2101
+ },
2102
+ "model.layers.22.self_attn.q_norm": false,
2103
+ "model.layers.22.self_attn.k_norm": false,
2104
+ "model.layers.22.self_attn.rope": false,
2105
+ "model.layers.22.mlp.gate_proj": {
2106
+ "group_size": 64,
2107
+ "bits": 4
2108
+ },
2109
+ "model.layers.22.mlp.down_proj": {
2110
+ "group_size": 64,
2111
+ "bits": 4
2112
+ },
2113
+ "model.layers.22.mlp.up_proj": {
2114
+ "group_size": 64,
2115
+ "bits": 4
2116
+ },
2117
+ "model.layers.22.input_layernorm": false,
2118
+ "model.layers.22.post_attention_layernorm": false,
2119
+ "model.layers.23.self_attn.q_proj": {
2120
+ "group_size": 64,
2121
+ "bits": 4
2122
+ },
2123
+ "model.layers.23.self_attn.k_proj": {
2124
+ "group_size": 64,
2125
+ "bits": 4
2126
+ },
2127
+ "model.layers.23.self_attn.v_proj": {
2128
+ "group_size": 64,
2129
+ "bits": 4
2130
+ },
2131
+ "model.layers.23.self_attn.o_proj": {
2132
+ "group_size": 64,
2133
+ "bits": 4
2134
+ },
2135
+ "model.layers.23.self_attn.q_norm": false,
2136
+ "model.layers.23.self_attn.k_norm": false,
2137
+ "model.layers.23.self_attn.rope": false,
2138
+ "model.layers.23.mlp.gate_proj": {
2139
+ "group_size": 64,
2140
+ "bits": 4
2141
+ },
2142
+ "model.layers.23.mlp.down_proj": {
2143
+ "group_size": 64,
2144
+ "bits": 4
2145
+ },
2146
+ "model.layers.23.mlp.up_proj": {
2147
+ "group_size": 64,
2148
+ "bits": 4
2149
+ },
2150
+ "model.layers.23.input_layernorm": false,
2151
+ "model.layers.23.post_attention_layernorm": false,
2152
+ "model.layers.24.self_attn.q_proj": {
2153
+ "group_size": 64,
2154
+ "bits": 4
2155
+ },
2156
+ "model.layers.24.self_attn.k_proj": {
2157
+ "group_size": 64,
2158
+ "bits": 4
2159
+ },
2160
+ "model.layers.24.self_attn.v_proj": {
2161
+ "group_size": 64,
2162
+ "bits": 4
2163
+ },
2164
+ "model.layers.24.self_attn.o_proj": {
2165
+ "group_size": 64,
2166
+ "bits": 4
2167
+ },
2168
+ "model.layers.24.self_attn.q_norm": false,
2169
+ "model.layers.24.self_attn.k_norm": false,
2170
+ "model.layers.24.self_attn.rope": false,
2171
+ "model.layers.24.mlp.gate_proj": {
2172
+ "group_size": 64,
2173
+ "bits": 4
2174
+ },
2175
+ "model.layers.24.mlp.down_proj": {
2176
+ "group_size": 64,
2177
+ "bits": 4
2178
+ },
2179
+ "model.layers.24.mlp.up_proj": {
2180
+ "group_size": 64,
2181
+ "bits": 4
2182
+ },
2183
+ "model.layers.24.input_layernorm": false,
2184
+ "model.layers.24.post_attention_layernorm": false,
2185
+ "model.layers.25.self_attn.q_proj": {
2186
+ "group_size": 64,
2187
+ "bits": 4
2188
+ },
2189
+ "model.layers.25.self_attn.k_proj": {
2190
+ "group_size": 64,
2191
+ "bits": 4
2192
+ },
2193
+ "model.layers.25.self_attn.v_proj": {
2194
+ "group_size": 64,
2195
+ "bits": 4
2196
+ },
2197
+ "model.layers.25.self_attn.o_proj": {
2198
+ "group_size": 64,
2199
+ "bits": 4
2200
+ },
2201
+ "model.layers.25.self_attn.q_norm": false,
2202
+ "model.layers.25.self_attn.k_norm": false,
2203
+ "model.layers.25.self_attn.rope": false,
2204
+ "model.layers.25.mlp.gate_proj": {
2205
+ "group_size": 64,
2206
+ "bits": 4
2207
+ },
2208
+ "model.layers.25.mlp.down_proj": {
2209
+ "group_size": 64,
2210
+ "bits": 4
2211
+ },
2212
+ "model.layers.25.mlp.up_proj": {
2213
+ "group_size": 64,
2214
+ "bits": 4
2215
+ },
2216
+ "model.layers.25.input_layernorm": false,
2217
+ "model.layers.25.post_attention_layernorm": false,
2218
+ "model.layers.26.self_attn.q_proj": {
2219
+ "group_size": 64,
2220
+ "bits": 4
2221
+ },
2222
+ "model.layers.26.self_attn.k_proj": {
2223
+ "group_size": 64,
2224
+ "bits": 4
2225
+ },
2226
+ "model.layers.26.self_attn.v_proj": {
2227
+ "group_size": 64,
2228
+ "bits": 4
2229
+ },
2230
+ "model.layers.26.self_attn.o_proj": {
2231
+ "group_size": 64,
2232
+ "bits": 4
2233
+ },
2234
+ "model.layers.26.self_attn.q_norm": false,
2235
+ "model.layers.26.self_attn.k_norm": false,
2236
+ "model.layers.26.self_attn.rope": false,
2237
+ "model.layers.26.mlp.gate_proj": {
2238
+ "group_size": 64,
2239
+ "bits": 4
2240
+ },
2241
+ "model.layers.26.mlp.down_proj": {
2242
+ "group_size": 64,
2243
+ "bits": 4
2244
+ },
2245
+ "model.layers.26.mlp.up_proj": {
2246
+ "group_size": 64,
2247
+ "bits": 4
2248
+ },
2249
+ "model.layers.26.input_layernorm": false,
2250
+ "model.layers.26.post_attention_layernorm": false,
2251
+ "model.layers.27.self_attn.q_proj": {
2252
+ "group_size": 64,
2253
+ "bits": 4
2254
+ },
2255
+ "model.layers.27.self_attn.k_proj": {
2256
+ "group_size": 64,
2257
+ "bits": 4
2258
+ },
2259
+ "model.layers.27.self_attn.v_proj": {
2260
+ "group_size": 64,
2261
+ "bits": 4
2262
+ },
2263
+ "model.layers.27.self_attn.o_proj": {
2264
+ "group_size": 64,
2265
+ "bits": 4
2266
+ },
2267
+ "model.layers.27.self_attn.q_norm": false,
2268
+ "model.layers.27.self_attn.k_norm": false,
2269
+ "model.layers.27.self_attn.rope": false,
2270
+ "model.layers.27.mlp.gate_proj": {
2271
+ "group_size": 64,
2272
+ "bits": 4
2273
+ },
2274
+ "model.layers.27.mlp.down_proj": {
2275
+ "group_size": 64,
2276
+ "bits": 4
2277
+ },
2278
+ "model.layers.27.mlp.up_proj": {
2279
+ "group_size": 64,
2280
+ "bits": 4
2281
+ },
2282
+ "model.layers.27.input_layernorm": false,
2283
+ "model.layers.27.post_attention_layernorm": false,
2284
+ "model.layers.28.self_attn.q_proj": {
2285
+ "group_size": 64,
2286
+ "bits": 4
2287
+ },
2288
+ "model.layers.28.self_attn.k_proj": {
2289
+ "group_size": 64,
2290
+ "bits": 4
2291
+ },
2292
+ "model.layers.28.self_attn.v_proj": {
2293
+ "group_size": 64,
2294
+ "bits": 4
2295
+ },
2296
+ "model.layers.28.self_attn.o_proj": {
2297
+ "group_size": 64,
2298
+ "bits": 4
2299
+ },
2300
+ "model.layers.28.self_attn.q_norm": false,
2301
+ "model.layers.28.self_attn.k_norm": false,
2302
+ "model.layers.28.self_attn.rope": false,
2303
+ "model.layers.28.mlp.gate_proj": {
2304
+ "group_size": 64,
2305
+ "bits": 4
2306
+ },
2307
+ "model.layers.28.mlp.down_proj": {
2308
+ "group_size": 64,
2309
+ "bits": 4
2310
+ },
2311
+ "model.layers.28.mlp.up_proj": {
2312
+ "group_size": 64,
2313
+ "bits": 4
2314
+ },
2315
+ "model.layers.28.input_layernorm": false,
2316
+ "model.layers.28.post_attention_layernorm": false,
2317
+ "model.layers.29.self_attn.q_proj": {
2318
+ "group_size": 64,
2319
+ "bits": 4
2320
+ },
2321
+ "model.layers.29.self_attn.k_proj": {
2322
+ "group_size": 64,
2323
+ "bits": 4
2324
+ },
2325
+ "model.layers.29.self_attn.v_proj": {
2326
+ "group_size": 64,
2327
+ "bits": 4
2328
+ },
2329
+ "model.layers.29.self_attn.o_proj": {
2330
+ "group_size": 64,
2331
+ "bits": 4
2332
+ },
2333
+ "model.layers.29.self_attn.q_norm": false,
2334
+ "model.layers.29.self_attn.k_norm": false,
2335
+ "model.layers.29.self_attn.rope": false,
2336
+ "model.layers.29.mlp.gate_proj": {
2337
+ "group_size": 64,
2338
+ "bits": 4
2339
+ },
2340
+ "model.layers.29.mlp.down_proj": {
2341
+ "group_size": 64,
2342
+ "bits": 4
2343
+ },
2344
+ "model.layers.29.mlp.up_proj": {
2345
+ "group_size": 64,
2346
+ "bits": 4
2347
+ },
2348
+ "model.layers.29.input_layernorm": false,
2349
+ "model.layers.29.post_attention_layernorm": false,
2350
+ "model.layers.30.self_attn.q_proj": {
2351
+ "group_size": 64,
2352
+ "bits": 4
2353
+ },
2354
+ "model.layers.30.self_attn.k_proj": {
2355
+ "group_size": 64,
2356
+ "bits": 4
2357
+ },
2358
+ "model.layers.30.self_attn.v_proj": {
2359
+ "group_size": 64,
2360
+ "bits": 4
2361
+ },
2362
+ "model.layers.30.self_attn.o_proj": {
2363
+ "group_size": 64,
2364
+ "bits": 4
2365
+ },
2366
+ "model.layers.30.self_attn.q_norm": false,
2367
+ "model.layers.30.self_attn.k_norm": false,
2368
+ "model.layers.30.self_attn.rope": false,
2369
+ "model.layers.30.mlp.gate_proj": {
2370
+ "group_size": 64,
2371
+ "bits": 4
2372
+ },
2373
+ "model.layers.30.mlp.down_proj": {
2374
+ "group_size": 64,
2375
+ "bits": 4
2376
+ },
2377
+ "model.layers.30.mlp.up_proj": {
2378
+ "group_size": 64,
2379
+ "bits": 4
2380
+ },
2381
+ "model.layers.30.input_layernorm": false,
2382
+ "model.layers.30.post_attention_layernorm": false,
2383
+ "model.layers.31.self_attn.q_proj": {
2384
+ "group_size": 64,
2385
+ "bits": 4
2386
+ },
2387
+ "model.layers.31.self_attn.k_proj": {
2388
+ "group_size": 64,
2389
+ "bits": 4
2390
+ },
2391
+ "model.layers.31.self_attn.v_proj": {
2392
+ "group_size": 64,
2393
+ "bits": 4
2394
+ },
2395
+ "model.layers.31.self_attn.o_proj": {
2396
+ "group_size": 64,
2397
+ "bits": 4
2398
+ },
2399
+ "model.layers.31.self_attn.q_norm": false,
2400
+ "model.layers.31.self_attn.k_norm": false,
2401
+ "model.layers.31.self_attn.rope": false,
2402
+ "model.layers.31.mlp.gate_proj": {
2403
+ "group_size": 64,
2404
+ "bits": 4
2405
+ },
2406
+ "model.layers.31.mlp.down_proj": {
2407
+ "group_size": 64,
2408
+ "bits": 4
2409
+ },
2410
+ "model.layers.31.mlp.up_proj": {
2411
+ "group_size": 64,
2412
+ "bits": 4
2413
+ },
2414
+ "model.layers.31.input_layernorm": false,
2415
+ "model.layers.31.post_attention_layernorm": false,
2416
+ "model.layers.32.self_attn.q_proj": {
2417
+ "group_size": 64,
2418
+ "bits": 4
2419
+ },
2420
+ "model.layers.32.self_attn.k_proj": {
2421
+ "group_size": 64,
2422
+ "bits": 4
2423
+ },
2424
+ "model.layers.32.self_attn.v_proj": {
2425
+ "group_size": 64,
2426
+ "bits": 4
2427
+ },
2428
+ "model.layers.32.self_attn.o_proj": {
2429
+ "group_size": 64,
2430
+ "bits": 4
2431
+ },
2432
+ "model.layers.32.self_attn.q_norm": false,
2433
+ "model.layers.32.self_attn.k_norm": false,
2434
+ "model.layers.32.self_attn.rope": false,
2435
+ "model.layers.32.mlp.gate_proj": {
2436
+ "group_size": 64,
2437
+ "bits": 4
2438
+ },
2439
+ "model.layers.32.mlp.down_proj": {
2440
+ "group_size": 64,
2441
+ "bits": 4
2442
+ },
2443
+ "model.layers.32.mlp.up_proj": {
2444
+ "group_size": 64,
2445
+ "bits": 4
2446
+ },
2447
+ "model.layers.32.input_layernorm": false,
2448
+ "model.layers.32.post_attention_layernorm": false,
2449
+ "model.layers.33.self_attn.q_proj": {
2450
+ "group_size": 64,
2451
+ "bits": 4
2452
+ },
2453
+ "model.layers.33.self_attn.k_proj": {
2454
+ "group_size": 64,
2455
+ "bits": 4
2456
+ },
2457
+ "model.layers.33.self_attn.v_proj": {
2458
+ "group_size": 64,
2459
+ "bits": 4
2460
+ },
2461
+ "model.layers.33.self_attn.o_proj": {
2462
+ "group_size": 64,
2463
+ "bits": 4
2464
+ },
2465
+ "model.layers.33.self_attn.q_norm": false,
2466
+ "model.layers.33.self_attn.k_norm": false,
2467
+ "model.layers.33.self_attn.rope": false,
2468
+ "model.layers.33.mlp.gate_proj": {
2469
+ "group_size": 64,
2470
+ "bits": 4
2471
+ },
2472
+ "model.layers.33.mlp.down_proj": {
2473
+ "group_size": 64,
2474
+ "bits": 4
2475
+ },
2476
+ "model.layers.33.mlp.up_proj": {
2477
+ "group_size": 64,
2478
+ "bits": 4
2479
+ },
2480
+ "model.layers.33.input_layernorm": false,
2481
+ "model.layers.33.post_attention_layernorm": false,
2482
+ "model.layers.34.self_attn.q_proj": {
2483
+ "group_size": 64,
2484
+ "bits": 4
2485
+ },
2486
+ "model.layers.34.self_attn.k_proj": {
2487
+ "group_size": 64,
2488
+ "bits": 4
2489
+ },
2490
+ "model.layers.34.self_attn.v_proj": {
2491
+ "group_size": 64,
2492
+ "bits": 4
2493
+ },
2494
+ "model.layers.34.self_attn.o_proj": {
2495
+ "group_size": 64,
2496
+ "bits": 4
2497
+ },
2498
+ "model.layers.34.self_attn.q_norm": false,
2499
+ "model.layers.34.self_attn.k_norm": false,
2500
+ "model.layers.34.self_attn.rope": false,
2501
+ "model.layers.34.mlp.gate_proj": {
2502
+ "group_size": 64,
2503
+ "bits": 4
2504
+ },
2505
+ "model.layers.34.mlp.down_proj": {
2506
+ "group_size": 64,
2507
+ "bits": 4
2508
+ },
2509
+ "model.layers.34.mlp.up_proj": {
2510
+ "group_size": 64,
2511
+ "bits": 4
2512
+ },
2513
+ "model.layers.34.input_layernorm": false,
2514
+ "model.layers.34.post_attention_layernorm": false,
2515
+ "model.layers.35.self_attn.q_proj": {
2516
+ "group_size": 64,
2517
+ "bits": 4
2518
+ },
2519
+ "model.layers.35.self_attn.k_proj": {
2520
+ "group_size": 64,
2521
+ "bits": 4
2522
+ },
2523
+ "model.layers.35.self_attn.v_proj": {
2524
+ "group_size": 64,
2525
+ "bits": 4
2526
+ },
2527
+ "model.layers.35.self_attn.o_proj": {
2528
+ "group_size": 64,
2529
+ "bits": 4
2530
+ },
2531
+ "model.layers.35.self_attn.q_norm": false,
2532
+ "model.layers.35.self_attn.k_norm": false,
2533
+ "model.layers.35.self_attn.rope": false,
2534
+ "model.layers.35.mlp.gate_proj": {
2535
+ "group_size": 64,
2536
+ "bits": 4
2537
+ },
2538
+ "model.layers.35.mlp.down_proj": {
2539
+ "group_size": 64,
2540
+ "bits": 4
2541
+ },
2542
+ "model.layers.35.mlp.up_proj": {
2543
+ "group_size": 64,
2544
+ "bits": 4
2545
+ },
2546
+ "model.layers.35.input_layernorm": false,
2547
+ "model.layers.35.post_attention_layernorm": false,
2548
+ "model.layers.36.self_attn.q_proj": {
2549
+ "group_size": 64,
2550
+ "bits": 4
2551
+ },
2552
+ "model.layers.36.self_attn.k_proj": {
2553
+ "group_size": 64,
2554
+ "bits": 4
2555
+ },
2556
+ "model.layers.36.self_attn.v_proj": {
2557
+ "group_size": 64,
2558
+ "bits": 4
2559
+ },
2560
+ "model.layers.36.self_attn.o_proj": {
2561
+ "group_size": 64,
2562
+ "bits": 4
2563
+ },
2564
+ "model.layers.36.self_attn.q_norm": false,
2565
+ "model.layers.36.self_attn.k_norm": false,
2566
+ "model.layers.36.self_attn.rope": false,
2567
+ "model.layers.36.mlp.gate_proj": {
2568
+ "group_size": 64,
2569
+ "bits": 4
2570
+ },
2571
+ "model.layers.36.mlp.down_proj": {
2572
+ "group_size": 64,
2573
+ "bits": 4
2574
+ },
2575
+ "model.layers.36.mlp.up_proj": {
2576
+ "group_size": 64,
2577
+ "bits": 4
2578
+ },
2579
+ "model.layers.36.input_layernorm": false,
2580
+ "model.layers.36.post_attention_layernorm": false,
2581
+ "model.layers.37.self_attn.q_proj": {
2582
+ "group_size": 64,
2583
+ "bits": 4
2584
+ },
2585
+ "model.layers.37.self_attn.k_proj": {
2586
+ "group_size": 64,
2587
+ "bits": 4
2588
+ },
2589
+ "model.layers.37.self_attn.v_proj": {
2590
+ "group_size": 64,
2591
+ "bits": 4
2592
+ },
2593
+ "model.layers.37.self_attn.o_proj": {
2594
+ "group_size": 64,
2595
+ "bits": 4
2596
+ },
2597
+ "model.layers.37.self_attn.q_norm": false,
2598
+ "model.layers.37.self_attn.k_norm": false,
2599
+ "model.layers.37.self_attn.rope": false,
2600
+ "model.layers.37.mlp.gate_proj": {
2601
+ "group_size": 64,
2602
+ "bits": 4
2603
+ },
2604
+ "model.layers.37.mlp.down_proj": {
2605
+ "group_size": 64,
2606
+ "bits": 4
2607
+ },
2608
+ "model.layers.37.mlp.up_proj": {
2609
+ "group_size": 64,
2610
+ "bits": 4
2611
+ },
2612
+ "model.layers.37.input_layernorm": false,
2613
+ "model.layers.37.post_attention_layernorm": false,
2614
+ "model.layers.38.self_attn.q_proj": {
2615
+ "group_size": 64,
2616
+ "bits": 4
2617
+ },
2618
+ "model.layers.38.self_attn.k_proj": {
2619
+ "group_size": 64,
2620
+ "bits": 4
2621
+ },
2622
+ "model.layers.38.self_attn.v_proj": {
2623
+ "group_size": 64,
2624
+ "bits": 4
2625
+ },
2626
+ "model.layers.38.self_attn.o_proj": {
2627
+ "group_size": 64,
2628
+ "bits": 4
2629
+ },
2630
+ "model.layers.38.self_attn.q_norm": false,
2631
+ "model.layers.38.self_attn.k_norm": false,
2632
+ "model.layers.38.self_attn.rope": false,
2633
+ "model.layers.38.mlp.gate_proj": {
2634
+ "group_size": 64,
2635
+ "bits": 4
2636
+ },
2637
+ "model.layers.38.mlp.down_proj": {
2638
+ "group_size": 64,
2639
+ "bits": 4
2640
+ },
2641
+ "model.layers.38.mlp.up_proj": {
2642
+ "group_size": 64,
2643
+ "bits": 4
2644
+ },
2645
+ "model.layers.38.input_layernorm": false,
2646
+ "model.layers.38.post_attention_layernorm": false,
2647
+ "model.layers.39.self_attn.q_proj": {
2648
+ "group_size": 64,
2649
+ "bits": 4
2650
+ },
2651
+ "model.layers.39.self_attn.k_proj": {
2652
+ "group_size": 64,
2653
+ "bits": 4
2654
+ },
2655
+ "model.layers.39.self_attn.v_proj": {
2656
+ "group_size": 64,
2657
+ "bits": 4
2658
+ },
2659
+ "model.layers.39.self_attn.o_proj": {
2660
+ "group_size": 64,
2661
+ "bits": 4
2662
+ },
2663
+ "model.layers.39.self_attn.q_norm": false,
2664
+ "model.layers.39.self_attn.k_norm": false,
2665
+ "model.layers.39.self_attn.rope": false,
2666
+ "model.layers.39.mlp.gate_proj": {
2667
+ "group_size": 64,
2668
+ "bits": 4
2669
+ },
2670
+ "model.layers.39.mlp.down_proj": {
2671
+ "group_size": 64,
2672
+ "bits": 4
2673
+ },
2674
+ "model.layers.39.mlp.up_proj": {
2675
+ "group_size": 64,
2676
+ "bits": 4
2677
+ },
2678
+ "model.layers.39.input_layernorm": false,
2679
+ "model.layers.39.post_attention_layernorm": false,
2680
+ "model.norm": false,
2681
+ "lm_head": {
2682
+ "group_size": 32,
2683
+ "bits": 4
2684
+ }
2685
+ },
2686
  "rms_norm_eps": 1e-06,
2687
  "rope_scaling": null,
2688
  "rope_theta": 1000000,
2689
  "sliding_window": null,
2690
  "tie_word_embeddings": false,
2691
  "torch_dtype": "bfloat16",
2692
+ "transformers_version": "4.51.3",
2693
+ "unsloth_fixed": true,
2694
  "use_cache": true,
2695
  "use_sliding_window": false,
2696
  "vocab_size": 151936
generation_config.json CHANGED
@@ -1,13 +1,14 @@
1
  {
2
- "bos_token_id": 151643,
3
- "do_sample": true,
4
- "eos_token_id": [
5
- 151645,
6
- 151643
7
- ],
8
- "pad_token_id": 151643,
9
- "temperature": 0.6,
10
- "top_k": 20,
11
- "top_p": 0.95,
12
- "transformers_version": "4.51.0"
13
- }
 
 
1
  {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "max_length": 40960,
9
+ "pad_token_id": 151654,
10
+ "temperature": 0.6,
11
+ "top_k": 20,
12
+ "top_p": 0.95,
13
+ "transformers_version": "4.51.3"
14
+ }
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a34cf9222fabcd23f5818c54655db29c63bb15405e5ed566737df0ff9cd0c84c
3
+ size 5352865510
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f0abd78717e47e007ad6c6dacaa763111f29a97fe04b6ac1189cb9856abe3d2
3
+ size 3052272046
special_tokens_map.json CHANGED
@@ -22,7 +22,7 @@
22
  "single_word": false
23
  },
24
  "pad_token": {
25
- "content": "<|endoftext|>",
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
 
22
  "single_word": false
23
  },
24
  "pad_token": {
25
+ "content": "<|vision_pad|>",
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
tokenizer_config.json CHANGED
@@ -227,13 +227,13 @@
227
  "<|video_pad|>"
228
  ],
229
  "bos_token": null,
230
- "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- endif %}\n{%- endif %}",
231
  "clean_up_tokenization_spaces": false,
232
  "eos_token": "<|im_end|>",
233
  "errors": "replace",
234
  "extra_special_tokens": {},
235
- "model_max_length": 131072,
236
- "pad_token": "<|endoftext|>",
 
237
  "split_special_tokens": false,
238
  "tokenizer_class": "Qwen2Tokenizer",
239
  "unk_token": null
 
227
  "<|video_pad|>"
228
  ],
229
  "bos_token": null,
 
230
  "clean_up_tokenization_spaces": false,
231
  "eos_token": "<|im_end|>",
232
  "errors": "replace",
233
  "extra_special_tokens": {},
234
+ "model_max_length": 40960,
235
+ "pad_token": "<|vision_pad|>",
236
+ "padding_side": "left",
237
  "split_special_tokens": false,
238
  "tokenizer_class": "Qwen2Tokenizer",
239
  "unk_token": null