reissbaker commited on Sep 18

Commit

d3f3bd9

verified ·

1 Parent(s): d90e0fd

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
chat_template.jinja +103 -0
config.json +89 -0
generation_config.json +10 -0
model-00001-of-00072.safetensors +3 -0
model-00002-of-00072.safetensors +3 -0
model-00003-of-00072.safetensors +3 -0
model-00004-of-00072.safetensors +3 -0
model-00005-of-00072.safetensors +3 -0
model-00006-of-00072.safetensors +3 -0
model-00007-of-00072.safetensors +3 -0
model-00010-of-00072.safetensors +3 -0
model-00012-of-00072.safetensors +3 -0
model-00013-of-00072.safetensors +3 -0
model-00014-of-00072.safetensors +3 -0
model-00016-of-00072.safetensors +3 -0
model-00017-of-00072.safetensors +3 -0
model-00020-of-00072.safetensors +3 -0
model-00021-of-00072.safetensors +3 -0
model-00022-of-00072.safetensors +3 -0
model-00026-of-00072.safetensors +3 -0
model-00029-of-00072.safetensors +3 -0
model-00031-of-00072.safetensors +3 -0
model-00032-of-00072.safetensors +3 -0
model-00037-of-00072.safetensors +3 -0
model-00042-of-00072.safetensors +3 -0
model-00045-of-00072.safetensors +3 -0
model-00048-of-00072.safetensors +3 -0
model-00050-of-00072.safetensors +3 -0
model-00051-of-00072.safetensors +3 -0
model-00052-of-00072.safetensors +3 -0
model-00056-of-00072.safetensors +3 -0
model-00059-of-00072.safetensors +3 -0
model-00060-of-00072.safetensors +3 -0
model-00061-of-00072.safetensors +3 -0
model-00062-of-00072.safetensors +3 -0
model-00063-of-00072.safetensors +3 -0
model-00064-of-00072.safetensors +3 -0
model-00065-of-00072.safetensors +3 -0
model-00066-of-00072.safetensors +3 -0
model-00067-of-00072.safetensors +3 -0
model-00068-of-00072.safetensors +3 -0
model-00069-of-00072.safetensors +3 -0
model-00070-of-00072.safetensors +3 -0
model-00071-of-00072.safetensors +3 -0
model-00072-of-00072.safetensors +3 -0
model.safetensors.index.json +0 -0
recipe.yaml +6 -0
special_tokens_map.json +40 -0
tokenizer.json +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,103 @@

+[gMASK]<sop>
+{%- if tools -%}
+<|system|>
+# Tools
+You may call one or more functions to assist with the user query.
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{% for tool in tools %}
+{{ tool | tojson(ensure_ascii=False) }}
+{% endfor %}
+</tools>
+For each function call, output the function name and arguments within the following XML format:
+<tool_call>{function-name}
+<arg_key>{arg-key-1}</arg_key>
+<arg_value>{arg-value-1}</arg_value>
+<arg_key>{arg-key-2}</arg_key>
+<arg_value>{arg-value-2}</arg_value>
+...
+</tool_call>{%- endif -%}
+{%- macro visible_text(content) -%}
+    {%- if content is string -%}
+        {{- content }}
+    {%- elif content is iterable and content is not mapping -%}
+        {%- for item in content -%}
+            {%- if item is mapping and item.type == 'text' -%}
+                {{- item.text }}
+            {%- elif item is string -%}
+                {{- item }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{- content }}
+    {%- endif -%}
+{%- endmacro -%}
+{%- set ns = namespace(last_user_index=-1) %}
+{%- for m in messages %}
+    {%- if m.role == 'user' %}
+        {% set ns.last_user_index = loop.index0 -%}
+    {%- endif %}
+{%- endfor %}
+{% for m in messages %}
+{%- if m.role == 'user' -%}<|user|>
+{{ visible_text(m.content) }}
+{{- '/nothink' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith("/nothink")) else '' -}}
+{%- elif m.role == 'assistant' -%}
+<|assistant|>
+{%- set reasoning_content = '' %}
+{%- set content = visible_text(m.content) %}
+{%- if m.reasoning_content is string %}
+    {%- set reasoning_content = m.reasoning_content %}
+{%- else %}
+    {%- if '</think>' in content %}
+        {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+        {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+    {%- endif %}
+{%- endif %}
+{%- if loop.index0 > ns.last_user_index and reasoning_content -%}
+{{ '\n<think>' + reasoning_content.strip() +  '</think>'}}
+{%- else -%}
+{{ '\n<think></think>' }}
+{%- endif -%}
+{%- if content.strip() -%}
+{{ '\n' + content.strip() }}
+{%- endif -%}
+{% if m.tool_calls %}
+{% for tc in m.tool_calls %}
+{%- if tc.function %}
+    {%- set tc = tc.function %}
+{%- endif %}
+{{ '\n<tool_call>' + tc.name }}
+{% set _args = tc.arguments %}
+{% for k, v in _args.items() %}
+<arg_key>{{ k }}</arg_key>
+<arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>
+{% endfor %}
+</tool_call>{% endfor %}
+{% endif %}
+{%- elif m.role == 'tool' -%}
+{%- if m.content is string -%}
+{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+    {{- '<|observation|>' }}
+{%- endif %}
+{{- '\n<tool_response>\n' }}
+{{- m.content }}
+{{- '\n</tool_response>' }}
+{%- else -%}
+<|observation|>{% for tr in m.content %}
+<tool_response>
+{{ tr.output if tr.output is defined else tr }}
+</tool_response>{% endfor -%}
+{% endif -%}
+{%- elif m.role == 'system' -%}
+<|system|>
+{{ visible_text(m.content) }}
+{%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    <|assistant|>{{- '\n<think></think>' if (enable_thinking is defined and not enable_thinking) else '' -}}
+{%- endif -%}

config.json ADDED Viewed

	@@ -0,0 +1,89 @@

+{
+  "architectures": [
+    "Glm4MoeForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "eos_token_id": [
+    151329,
+    151336,
+    151338
+  ],
+  "first_k_dense_replace": 3,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "max_position_embeddings": 131072,
+  "model_type": "glm4_moe",
+  "moe_intermediate_size": 1536,
+  "n_group": 1,
+  "n_routed_experts": 160,
+  "n_shared_experts": 1,
+  "norm_topk_prob": true,
+  "num_attention_heads": 96,
+  "num_experts_per_tok": 8,
+  "num_hidden_layers": 92,
+  "num_key_value_heads": 8,
+  "num_nextn_predict_layers": 1,
+  "pad_token_id": 151329,
+  "partial_rotary_factor": 0.5,
+  "quantization_config": {
+    "config_groups": {
+      "group_0": {
+        "format": "float-quantized",
+        "input_activations": {
+          "actorder": null,
+          "block_structure": null,
+          "dynamic": true,
+          "group_size": null,
+          "num_bits": 8,
+          "observer": null,
+          "observer_kwargs": {},
+          "strategy": "token",
+          "symmetric": true,
+          "type": "float"
+        },
+        "output_activations": null,
+        "targets": [
+          "Linear"
+        ],
+        "weights": {
+          "actorder": null,
+          "block_structure": null,
+          "dynamic": false,
+          "group_size": null,
+          "num_bits": 8,
+          "observer": "minmax",
+          "observer_kwargs": {},
+          "strategy": "channel",
+          "symmetric": true,
+          "type": "float"
+        }
+      }
+    },
+    "format": "float-quantized",
+    "global_compression_ratio": null,
+    "ignore": [
+      "lm_head"
+    ],
+    "kv_cache_scheme": null,
+    "quant_method": "compressed-tensors",
+    "quantization_status": "compressed",
+    "sparsity_config": {},
+    "transform_config": {},
+    "version": "0.11.0"
+  },
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "routed_scaling_factor": 2.5,
+  "tie_word_embeddings": false,
+  "topk_group": 1,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.55.2",
+  "use_cache": true,
+  "use_qk_norm": true,
+  "vocab_size": 151552
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "_from_model_config": true,
+  "eos_token_id": [
+    151329,
+    151336,
+    151338
+  ],
+  "pad_token_id": 151329,
+  "transformers_version": "4.55.2"
+}

model-00001-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d48ce060551ce29dc2535dd222e280c0157e756facf137c37a05d09fc2ddbbd2
+size 4993429864

model-00002-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e38eb905e9e823b4e86fa1302bb9dddd4315f8a7017400555463e1eaa7057c1
+size 4993855232

model-00003-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e739f8e595cd4ea69630a256957f7ca5da6a97a07b808402927e007aa55b29d5
+size 4998106784

model-00004-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c965084dc1983739fd41e22f0a94ac1359a02d5d3ecf1f772b92fb2414d32462
+size 4993854968

model-00005-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e4680bec9ff90a69dc2cc8f82c11328a4e97316373a31db67191fb45bf0ada4
+size 4993847864

model-00006-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7ba8300fec303d21cc43ac29772e9068cc7a3dbae968763715f099360a97214
+size 4993855304

model-00007-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a2561af72be2a0d4e8e0c6fffc9a0837fcd967912b45220c2b182cba1f8148f3
+size 4998107648

model-00010-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d93a7c576d613f2bca0f14a61303289d758fa974fe369e6b994b6c02ae0c1f59
+size 4998108080

model-00012-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c831d8211251117e0ea2bfe77bb719df03b8399dbda468bb5acd2421b0de5058
+size 4993856224

model-00013-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:35909aef1a6d2202c72b70e73b2e4ac38f7c0733084c347cd2d22f22088788d6
+size 4993849264

model-00014-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f1db08eb27ad8d857adbf03e18a516f2600f447c99cb21800229cf9bb3be839
+size 4998108024

model-00016-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dd3e6c5b4cb70dd05e782ede459a70ebde6babe4943c8b8743f05da9dbf26d73
+size 4993856232

model-00017-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae99d65c69d315c46505c8e85e5e8393b530c8831d90a95bdd55a2e880d9aba3
+size 4993849336

model-00020-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc16b090c3bb6492234d0f2c2dd31fe5306f54eb51dee75e754c78a035c7c0bd
+size 4993856304

model-00021-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17fd987e550890c7b2980fd2a8c166d8d4965dda6159c5592f8308e68f71f0a6
+size 4993849408

model-00022-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:999379c13b2e54593fef5b5518221ed1652652cf30b876973dc9e63da1fc3f7c
+size 4998108040

model-00026-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8ffe3260c6cdfa39c979ee25d9865bbc6e6ef087387d1bd1be2b19e32398da48
+size 4993849016

model-00029-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6035323c91be07dd7e03270625d567f4b1f604be548988f40ab8b44b109e79c7
+size 4998108016

model-00031-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c62a82f249fb1eb5c9d770f9f4ada5fcda54303ba30a3d65374d188b5628662a
+size 4993856256

model-00032-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ae235cc6070e9acc9e0b2b1d116f553ac9e15cedd26c5b5e0988756f9562df3
+size 4993856520

model-00037-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:35a8e142677b4c4c2bf7aa97b788685ac7683c600ceb309d9073d2471f3baba3
+size 4996447680

model-00042-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd8a08e9630ae1fc7f04af5072e8f3d65f4bbe549d35da11541ee818253865ef
+size 4993849056

model-00045-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de051d38a34304e1523a9b6e16b6ffbcaa3ed884291182f87e232a9f62745c9d
+size 4993856224

model-00048-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:27dea266e39e99d166005d8c34c9370eedf18763e5439ac76fb94bf1570e1246
+size 4998108024

model-00050-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3607d3dc6af36b9430731c04e5a7a1bb8c1af8b80955d0309e7ef52675d2e60c
+size 4993849168

model-00051-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5319a2598f729108c733b59a6f8874582951f044801f8f82a44fd0214215d520
+size 4990277320

model-00052-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c19e2c697e2f4567dc2d5e8623c231672b50da43ba6e99a0b84ebec272a6471b
+size 4993819280

model-00056-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bd0466c956a4dc8f95d6a991b0b210dafbcc3cafd650efac65ed705b10e6156
+size 4993856216

model-00059-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c09325a7d38067991a61bdd4e3a5558c3573fa244476916102333eec6878589e
+size 4998108024

model-00060-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73a55ab3af5aceb5a331850a0d702e4341f9899289b60c1dd65ea04c338eaae6
+size 4993856224

model-00061-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51d1c1418fb02eab712b1e21e98289ba1a7d84f01fc02a5a2225f1735d4af1a2
+size 4993849120

model-00062-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:18e6c389cb7b0a8222f5fe08e2581f598f8a650e8993feaa151e90b6874b70ec
+size 4993856560

model-00063-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb43d9d6bb92c48c5afec722637b4e808ad847e3fa177f5b3ffc0b38cbbb8f2d
+size 4998108024

model-00064-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e2370a4d40b2728d0c80d55ef23b519715c53d0e0febce72aa25a8bcb34a59f
+size 4993856224

model-00065-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91addc592434e2a9f6b57c996414d093fde499c23d0702f490bcc12753d702d5
+size 4993849192

model-00066-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b4191713a2476ccfdc7068e95280216a0c0f02ce32351cb7f731d5cb2b73907
+size 4998108080

model-00067-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c3b9a15fbb0a85c3fed9c8378425eabd7103337fc07b0ee7b00f8b0a29fe1bc9
+size 4993856168

model-00068-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eee33f70cec9f93cceb11b2d358d06484e21c8f83bae2e8a43785d4e19187ae4
+size 4993856224

model-00069-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ddf041a1d9c578b806a0f65e2c35a5e96d819805d168a5b343a1a3c49275510e
+size 4993849264

model-00070-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:afe91c7d3a69c250c6f12b0c0c6ff8d2630ec1e3ac7c579093be0b6f2c7d651e
+size 4998108024

model-00071-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:061f76ef82bb7fadcdf48b59819b2262d6d34305914fc1144cb7ec414cee2b8b
+size 3503839048

model-00072-of-00072.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ebf648ab9c4c78b3573f69b6868e0e74476b856418cca0bfab91277346b63cd1
+size 1551892608

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

recipe.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+default_stage:
+  default_modifiers:
+    QuantizationModifier:
+      targets: [Linear]
+      ignore: [lm_head]
+      scheme: FP8_DYNAMIC

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "additional_special_tokens": [
+    "<|endoftext|>",
+    "[MASK]",
+    "[gMASK]",
+    "[sMASK]",
+    "<sop>",
+    "<eop>",
+    "<|system|>",
+    "<|user|>",
+    "<|assistant|>",
+    "<|observation|>",
+    "<|begin_of_image|>",
+    "<|end_of_image|>",
+    "<|begin_of_video|>",
+    "<|end_of_video|>",
+    "<|begin_of_audio|>",
+    "<|end_of_audio|>",
+    "<|begin_of_transcription|>",
+    "<|end_of_transcription|>",
+    "<|code_prefix|>",
+    "<|code_middle|>",
+    "<|code_suffix|>",
+    "/nothink"
+  ],
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bda8e2146c3bb7b7e0fc96dcc4f0aeff041c6c27952e3ace0665663ebff346ba
+size 19970700