Text Generation
Transformers
Safetensors
PyTorch
nvidia
conversational
cpatonn commited on
Commit
8cdf1f6
·
verified ·
1 Parent(s): bca1b53

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -68,7 +68,7 @@
68
  "quantization_status": "compressed",
69
  "sparsity_config": {},
70
  "transform_config": {},
71
- "version": "0.10.3.dev47+ge463fe6"
72
  },
73
  "rescale_prenorm_residual": true,
74
  "residual_in_fp32": false,
@@ -84,7 +84,7 @@
84
  "time_step_max": 0.1,
85
  "time_step_min": 0.001,
86
  "time_step_rank": 256,
87
- "transformers_version": "4.56.0.dev0",
88
  "use_bias": false,
89
  "use_cache": true,
90
  "use_conv_bias": true,
 
68
  "quantization_status": "compressed",
69
  "sparsity_config": {},
70
  "transform_config": {},
71
+ "version": "0.10.3.dev71+g3ae8ca4.d20250913"
72
  },
73
  "rescale_prenorm_residual": true,
74
  "residual_in_fp32": false,
 
84
  "time_step_max": 0.1,
85
  "time_step_min": 0.001,
86
  "time_step_rank": 256,
87
+ "transformers_version": "4.57.0.dev0",
88
  "use_bias": false,
89
  "use_cache": true,
90
  "use_conv_bias": true,
generation_config.json CHANGED
@@ -7,5 +7,5 @@
7
  12
8
  ],
9
  "pad_token_id": 0,
10
- "transformers_version": "4.56.0.dev0"
11
  }
 
7
  12
8
  ],
9
  "pad_token_id": 0,
10
+ "transformers_version": "4.57.0.dev0"
11
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cda8100f0c0e54b94a311baf95dcb87641c029c53f47cfb618a4419a8a9c4fea
3
  size 4984644328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:373d3b262f6787efa3eba167834f93ca3041c46319053962a8896d63adbcc76a
3
  size 4984644328
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:364d6c4cf28a860a3af4e0419a6830b67cfb29f87cdd2312597ef69a49c8499c
3
  size 3872549848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdb031720d413c9b73b6cd867991ecaa664057d73d6c0f5b2ab84b885ed6db7a
3
  size 3872549848
modeling_nemotron_h.py CHANGED
@@ -1458,8 +1458,17 @@ class NemotronHModel(NemotronHPreTrainedModel):
1458
  2. Attending to all inputs
1459
  """
1460
  mamba_mask = attention_mask
1461
- if cache_position[0] > 0 or (attention_mask is not None and torch.all(attention_mask == 1)):
1462
- mamba_mask = None
 
 
 
 
 
 
 
 
 
1463
  return mamba_mask
1464
 
1465
 
@@ -1635,4 +1644,4 @@ class NemotronHForCausalLM(NemotronHPreTrainedModel, GenerationMixin):
1635
  cache_params=nemotron_h_outputs.cache_params,
1636
  hidden_states=nemotron_h_outputs.hidden_states,
1637
  attentions=nemotron_h_outputs.attentions,
1638
- )
 
1458
  2. Attending to all inputs
1459
  """
1460
  mamba_mask = attention_mask
1461
+
1462
+ # Check if we're in tracing mode
1463
+ if not torch.jit.is_tracing() and not torch.fx._symbolic_trace.is_fx_tracing():
1464
+ try:
1465
+ # Normal execution path
1466
+ if cache_position[0] > 0 or (attention_mask is not None and torch.all(attention_mask == 1)):
1467
+ mamba_mask = None
1468
+ except (TypeError, torch.fx.proxy.TraceError):
1469
+ # Fallback for tracing mode
1470
+ pass
1471
+
1472
  return mamba_mask
1473
 
1474
 
 
1644
  cache_params=nemotron_h_outputs.cache_params,
1645
  hidden_states=nemotron_h_outputs.hidden_states,
1646
  attentions=nemotron_h_outputs.attentions,
1647
+ )
recipe.yaml CHANGED
@@ -1,5 +1,5 @@
1
- quant_stage:
2
- quant_modifiers:
3
  AWQModifier:
4
  config_groups:
5
  group_0:
@@ -19,14 +19,16 @@ quant_stage:
19
  output_activations: null
20
  format: null
21
  targets: [Linear]
22
- ignore: [backbone.embeddings, 're:.*mixer.A_log', 're:.*conv1d.bias', 're:.*mixer.D',
23
- 're:.*mixer.dt_bias', 're:.*norm', 're:backbone.norm_f', lm_head]
24
  mappings:
25
- - smooth_layer: re:.*norm$
26
- balance_layers: ['re:.*q_proj$', 're:.*k_proj$', 're:.*v_proj$', 're:.*up_proj$',
27
- 're:.*in_proj$']
28
- - smooth_layer: re:.*v_proj$
29
- balance_layers: ['re:.*o_proj$']
30
- - smooth_layer: re:.*up_proj$
31
- balance_layers: ['re:.*down_proj$']
 
 
32
  duo_scaling: true
 
1
+ default_stage:
2
+ default_modifiers:
3
  AWQModifier:
4
  config_groups:
5
  group_0:
 
19
  output_activations: null
20
  format: null
21
  targets: [Linear]
22
+ ignore: [backbone.embeddings, 're:.*mixer.A_log', 're:.*conv1d.*', 're:.*mixer.D', 're:.*mixer.norm.*',
23
+ 're:.*mixer.dt_bias', 're:backbone.norm_f', lm_head]
24
  mappings:
25
+ - smooth_layer: re:backbone\.layers\.(7|16|25|34|43|52)\.norm$
26
+ balance_layers: ['re:backbone\.layers\.(7|16|25|34|43|52)\.mixer\.q_proj$', 're:backbone\.layers\.(7|16|25|34|43|52)\.mixer\.k_proj$',
27
+ 're:backbone\.layers\.(7|16|25|34|43|52)\.mixer\.v_proj$']
28
+ - smooth_layer: re:backbone\.layers\.(7|16|25|34|43|52)\.mixer\.v_proj$
29
+ balance_layers: ['re:backbone\.layers\.(7|16|25|34|43|52)\.mixer\.o_proj$']
30
+ - smooth_layer: re:backbone\.layers\.(1|3|5|8|10|12|14|17|19|21|23|26|28|30|32|35|37|39|41|44|46|48|50|53|55|57|59|61)\.norm$
31
+ balance_layers: ['re:backbone\.layers\.(1|3|5|8|10|12|14|17|19|21|23|26|28|30|32|35|37|39|41|44|46|48|50|53|55|57|59|61)\.mixer\.up_proj$']
32
+ - smooth_layer: re:backbone\.layers\.(1|3|5|8|10|12|14|17|19|21|23|26|28|30|32|35|37|39|41|44|46|48|50|53|55|57|59|61)\.mixer\.up_proj$
33
+ balance_layers: ['re:backbone\.layers\.(1|3|5|8|10|12|14|17|19|21|23|26|28|30|32|35|37|39|41|44|46|48|50|53|55|57|59|61)\.mixer\.down_proj$']
34
  duo_scaling: true