Upload folder using huggingface_hub
Browse files- config.json +2 -2
- generation_config.json +1 -1
- model-00001-of-00002.safetensors +1 -1
- model-00002-of-00002.safetensors +1 -1
- modeling_nemotron_h.py +12 -3
- recipe.yaml +13 -11
config.json
CHANGED
|
@@ -68,7 +68,7 @@
|
|
| 68 |
"quantization_status": "compressed",
|
| 69 |
"sparsity_config": {},
|
| 70 |
"transform_config": {},
|
| 71 |
-
"version": "0.10.3.
|
| 72 |
},
|
| 73 |
"rescale_prenorm_residual": true,
|
| 74 |
"residual_in_fp32": false,
|
|
@@ -84,7 +84,7 @@
|
|
| 84 |
"time_step_max": 0.1,
|
| 85 |
"time_step_min": 0.001,
|
| 86 |
"time_step_rank": 256,
|
| 87 |
-
"transformers_version": "4.
|
| 88 |
"use_bias": false,
|
| 89 |
"use_cache": true,
|
| 90 |
"use_conv_bias": true,
|
|
|
|
| 68 |
"quantization_status": "compressed",
|
| 69 |
"sparsity_config": {},
|
| 70 |
"transform_config": {},
|
| 71 |
+
"version": "0.10.3.dev71+g3ae8ca4.d20250913"
|
| 72 |
},
|
| 73 |
"rescale_prenorm_residual": true,
|
| 74 |
"residual_in_fp32": false,
|
|
|
|
| 84 |
"time_step_max": 0.1,
|
| 85 |
"time_step_min": 0.001,
|
| 86 |
"time_step_rank": 256,
|
| 87 |
+
"transformers_version": "4.57.0.dev0",
|
| 88 |
"use_bias": false,
|
| 89 |
"use_cache": true,
|
| 90 |
"use_conv_bias": true,
|
generation_config.json
CHANGED
|
@@ -7,5 +7,5 @@
|
|
| 7 |
12
|
| 8 |
],
|
| 9 |
"pad_token_id": 0,
|
| 10 |
-
"transformers_version": "4.
|
| 11 |
}
|
|
|
|
| 7 |
12
|
| 8 |
],
|
| 9 |
"pad_token_id": 0,
|
| 10 |
+
"transformers_version": "4.57.0.dev0"
|
| 11 |
}
|
model-00001-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4984644328
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:373d3b262f6787efa3eba167834f93ca3041c46319053962a8896d63adbcc76a
|
| 3 |
size 4984644328
|
model-00002-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3872549848
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fdb031720d413c9b73b6cd867991ecaa664057d73d6c0f5b2ab84b885ed6db7a
|
| 3 |
size 3872549848
|
modeling_nemotron_h.py
CHANGED
|
@@ -1458,8 +1458,17 @@ class NemotronHModel(NemotronHPreTrainedModel):
|
|
| 1458 |
2. Attending to all inputs
|
| 1459 |
"""
|
| 1460 |
mamba_mask = attention_mask
|
| 1461 |
-
|
| 1462 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1463 |
return mamba_mask
|
| 1464 |
|
| 1465 |
|
|
@@ -1635,4 +1644,4 @@ class NemotronHForCausalLM(NemotronHPreTrainedModel, GenerationMixin):
|
|
| 1635 |
cache_params=nemotron_h_outputs.cache_params,
|
| 1636 |
hidden_states=nemotron_h_outputs.hidden_states,
|
| 1637 |
attentions=nemotron_h_outputs.attentions,
|
| 1638 |
-
)
|
|
|
|
| 1458 |
2. Attending to all inputs
|
| 1459 |
"""
|
| 1460 |
mamba_mask = attention_mask
|
| 1461 |
+
|
| 1462 |
+
# Check if we're in tracing mode
|
| 1463 |
+
if not torch.jit.is_tracing() and not torch.fx._symbolic_trace.is_fx_tracing():
|
| 1464 |
+
try:
|
| 1465 |
+
# Normal execution path
|
| 1466 |
+
if cache_position[0] > 0 or (attention_mask is not None and torch.all(attention_mask == 1)):
|
| 1467 |
+
mamba_mask = None
|
| 1468 |
+
except (TypeError, torch.fx.proxy.TraceError):
|
| 1469 |
+
# Fallback for tracing mode
|
| 1470 |
+
pass
|
| 1471 |
+
|
| 1472 |
return mamba_mask
|
| 1473 |
|
| 1474 |
|
|
|
|
| 1644 |
cache_params=nemotron_h_outputs.cache_params,
|
| 1645 |
hidden_states=nemotron_h_outputs.hidden_states,
|
| 1646 |
attentions=nemotron_h_outputs.attentions,
|
| 1647 |
+
)
|
recipe.yaml
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
AWQModifier:
|
| 4 |
config_groups:
|
| 5 |
group_0:
|
|
@@ -19,14 +19,16 @@ quant_stage:
|
|
| 19 |
output_activations: null
|
| 20 |
format: null
|
| 21 |
targets: [Linear]
|
| 22 |
-
ignore: [backbone.embeddings, 're:.*mixer.A_log', 're:.*conv1d
|
| 23 |
-
're:.*mixer.dt_bias', 're
|
| 24 |
mappings:
|
| 25 |
-
- smooth_layer: re
|
| 26 |
-
balance_layers: ['re
|
| 27 |
-
're
|
| 28 |
-
- smooth_layer: re
|
| 29 |
-
balance_layers: ['re
|
| 30 |
-
- smooth_layer: re
|
| 31 |
-
balance_layers: ['re
|
|
|
|
|
|
|
| 32 |
duo_scaling: true
|
|
|
|
| 1 |
+
default_stage:
|
| 2 |
+
default_modifiers:
|
| 3 |
AWQModifier:
|
| 4 |
config_groups:
|
| 5 |
group_0:
|
|
|
|
| 19 |
output_activations: null
|
| 20 |
format: null
|
| 21 |
targets: [Linear]
|
| 22 |
+
ignore: [backbone.embeddings, 're:.*mixer.A_log', 're:.*conv1d.*', 're:.*mixer.D', 're:.*mixer.norm.*',
|
| 23 |
+
're:.*mixer.dt_bias', 're:backbone.norm_f', lm_head]
|
| 24 |
mappings:
|
| 25 |
+
- smooth_layer: re:backbone\.layers\.(7|16|25|34|43|52)\.norm$
|
| 26 |
+
balance_layers: ['re:backbone\.layers\.(7|16|25|34|43|52)\.mixer\.q_proj$', 're:backbone\.layers\.(7|16|25|34|43|52)\.mixer\.k_proj$',
|
| 27 |
+
're:backbone\.layers\.(7|16|25|34|43|52)\.mixer\.v_proj$']
|
| 28 |
+
- smooth_layer: re:backbone\.layers\.(7|16|25|34|43|52)\.mixer\.v_proj$
|
| 29 |
+
balance_layers: ['re:backbone\.layers\.(7|16|25|34|43|52)\.mixer\.o_proj$']
|
| 30 |
+
- smooth_layer: re:backbone\.layers\.(1|3|5|8|10|12|14|17|19|21|23|26|28|30|32|35|37|39|41|44|46|48|50|53|55|57|59|61)\.norm$
|
| 31 |
+
balance_layers: ['re:backbone\.layers\.(1|3|5|8|10|12|14|17|19|21|23|26|28|30|32|35|37|39|41|44|46|48|50|53|55|57|59|61)\.mixer\.up_proj$']
|
| 32 |
+
- smooth_layer: re:backbone\.layers\.(1|3|5|8|10|12|14|17|19|21|23|26|28|30|32|35|37|39|41|44|46|48|50|53|55|57|59|61)\.mixer\.up_proj$
|
| 33 |
+
balance_layers: ['re:backbone\.layers\.(1|3|5|8|10|12|14|17|19|21|23|26|28|30|32|35|37|39|41|44|46|48|50|53|55|57|59|61)\.mixer\.down_proj$']
|
| 34 |
duo_scaling: true
|