liushaowei
commited on
Commit
·
a060a3e
1
Parent(s):
22bbb72
fix tokenizer
Browse files- chat_template.jinja +9 -9
- tokenizer_config.json +2 -2
chat_template.jinja
CHANGED
|
@@ -35,18 +35,18 @@
|
|
| 35 |
{%- endmacro -%}
|
| 36 |
|
| 37 |
|
| 38 |
-
{# Find last
|
| 39 |
-
{%- set ns = namespace(
|
| 40 |
{%- for idx in range(messages|length-1, -1, -1) -%}
|
| 41 |
-
{%- if messages[idx]['role'] == '
|
| 42 |
-
{%- set ns.
|
| 43 |
{%- break -%}
|
| 44 |
{%- endif -%}
|
| 45 |
{%- endfor -%}
|
| 46 |
|
| 47 |
-
{# split all messages into history & suffix
|
| 48 |
-
{%- set hist_msgs = messages[:ns.
|
| 49 |
-
{%- set suffix_msgs = messages[ns.
|
| 50 |
|
| 51 |
{%- if tools -%}
|
| 52 |
<|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}<|im_end|>
|
|
@@ -58,7 +58,7 @@
|
|
| 58 |
{%- endif -%}
|
| 59 |
{{set_roles(message)}}
|
| 60 |
{%- if message['role'] == 'assistant' -%}
|
| 61 |
-
|
| 62 |
{%- if message.get('tool_calls') -%}
|
| 63 |
{{render_toolcalls(message)}}
|
| 64 |
{%- endif -%}
|
|
@@ -76,7 +76,7 @@
|
|
| 76 |
{{set_roles(message)}}
|
| 77 |
{%- if message['role'] == 'assistant' -%}
|
| 78 |
{%- set rc = message.get('reasoning_content', '') -%}
|
| 79 |
-
|
| 80 |
{%- if message.get('tool_calls') -%}
|
| 81 |
{{render_toolcalls(message)}}
|
| 82 |
{%- endif -%}
|
|
|
|
| 35 |
{%- endmacro -%}
|
| 36 |
|
| 37 |
|
| 38 |
+
{# Find last non-tool-call assisitant message #}
|
| 39 |
+
{%- set ns = namespace(last_non_tool_call_assistant_msg=-1) -%}
|
| 40 |
{%- for idx in range(messages|length-1, -1, -1) -%}
|
| 41 |
+
{%- if messages[idx]['role'] == 'assistant' and not messages[idx].get('tool_calls') -%}
|
| 42 |
+
{%- set ns.last_non_tool_call_assistant_msg = idx -%}
|
| 43 |
{%- break -%}
|
| 44 |
{%- endif -%}
|
| 45 |
{%- endfor -%}
|
| 46 |
|
| 47 |
+
{# split all messages into history & suffix, reasoning_content in suffix should be reserved.#}
|
| 48 |
+
{%- set hist_msgs = messages[:ns.last_non_tool_call_assistant_msg+1] -%}
|
| 49 |
+
{%- set suffix_msgs = messages[ns.last_non_tool_call_assistant_msg+1:] -%}
|
| 50 |
|
| 51 |
{%- if tools -%}
|
| 52 |
<|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}<|im_end|>
|
|
|
|
| 58 |
{%- endif -%}
|
| 59 |
{{set_roles(message)}}
|
| 60 |
{%- if message['role'] == 'assistant' -%}
|
| 61 |
+
<think></think>{{render_content(message)}}
|
| 62 |
{%- if message.get('tool_calls') -%}
|
| 63 |
{{render_toolcalls(message)}}
|
| 64 |
{%- endif -%}
|
|
|
|
| 76 |
{{set_roles(message)}}
|
| 77 |
{%- if message['role'] == 'assistant' -%}
|
| 78 |
{%- set rc = message.get('reasoning_content', '') -%}
|
| 79 |
+
<think>{{rc}}</think>{{render_content(message)}}
|
| 80 |
{%- if message.get('tool_calls') -%}
|
| 81 |
{{render_toolcalls(message)}}
|
| 82 |
{%- endif -%}
|
tokenizer_config.json
CHANGED
|
@@ -121,7 +121,7 @@
|
|
| 121 |
"special": true
|
| 122 |
},
|
| 123 |
"163606": {
|
| 124 |
-
"content": "
|
| 125 |
"lstrip": false,
|
| 126 |
"normalized": false,
|
| 127 |
"rstrip": false,
|
|
@@ -129,7 +129,7 @@
|
|
| 129 |
"special": false
|
| 130 |
},
|
| 131 |
"163607": {
|
| 132 |
-
"content": "
|
| 133 |
"lstrip": false,
|
| 134 |
"normalized": false,
|
| 135 |
"rstrip": false,
|
|
|
|
| 121 |
"special": true
|
| 122 |
},
|
| 123 |
"163606": {
|
| 124 |
+
"content": "<think>",
|
| 125 |
"lstrip": false,
|
| 126 |
"normalized": false,
|
| 127 |
"rstrip": false,
|
|
|
|
| 129 |
"special": false
|
| 130 |
},
|
| 131 |
"163607": {
|
| 132 |
+
"content": "</think>",
|
| 133 |
"lstrip": false,
|
| 134 |
"normalized": false,
|
| 135 |
"rstrip": false,
|