Update config.json for upcoming Transformers support
Browse files- config.json +12 -1
config.json
CHANGED
|
@@ -31,6 +31,17 @@
|
|
| 31 |
"vocab_size": 32000,
|
| 32 |
"quantization_config": {
|
| 33 |
"bits": 4,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
"group_size": 128,
|
| 35 |
"damp_percent": 0.1,
|
| 36 |
"desc_act": true,
|
|
@@ -40,4 +51,4 @@
|
|
| 40 |
"model_file_base_name": "model",
|
| 41 |
"quant_method": "gptq"
|
| 42 |
}
|
| 43 |
-
}
|
|
|
|
| 31 |
"vocab_size": 32000,
|
| 32 |
"quantization_config": {
|
| 33 |
"bits": 4,
|
| 34 |
+
"inside_layer_modules" : [
|
| 35 |
+
["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"],
|
| 36 |
+
["self_attn.o_proj"],
|
| 37 |
+
["block_sparse_moe.experts.0.w1", "block_sparse_moe.experts.0.w2", "block_sparse_moe.experts.0.w3"],
|
| 38 |
+
["block_sparse_moe.experts.1.w1", "block_sparse_moe.experts.1.w2", "block_sparse_moe.experts.1.w3"],
|
| 39 |
+
["block_sparse_moe.experts.2.w1", "block_sparse_moe.experts.2.w2", "block_sparse_moe.experts.2.w3"],
|
| 40 |
+
["block_sparse_moe.experts.3.w1", "block_sparse_moe.experts.3.w2", "block_sparse_moe.experts.3.w3"],
|
| 41 |
+
["block_sparse_moe.experts.4.w1", "block_sparse_moe.experts.4.w2", "block_sparse_moe.experts.4.w3"],
|
| 42 |
+
["block_sparse_moe.experts.5.w1", "block_sparse_moe.experts.5.w2", "block_sparse_moe.experts.5.w3"],
|
| 43 |
+
["block_sparse_moe.experts.6.w1", "block_sparse_moe.experts.6.w2", "block_sparse_moe.experts.6.w3"],
|
| 44 |
+
["block_sparse_moe.experts.7.w1", "block_sparse_moe.experts.7.w2", "block_sparse_moe.experts.7.w3"]],
|
| 45 |
"group_size": 128,
|
| 46 |
"damp_percent": 0.1,
|
| 47 |
"desc_act": true,
|
|
|
|
| 51 |
"model_file_base_name": "model",
|
| 52 |
"quant_method": "gptq"
|
| 53 |
}
|
| 54 |
+
}
|