liushaowei commited on
Commit
a060a3e
·
1 Parent(s): 22bbb72

fix tokenizer

Browse files
Files changed (2) hide show
  1. chat_template.jinja +9 -9
  2. tokenizer_config.json +2 -2
chat_template.jinja CHANGED
@@ -35,18 +35,18 @@
35
  {%- endmacro -%}
36
 
37
 
38
- {# Find last user msg #}
39
- {%- set ns = namespace(last_user_idx=messages|length) -%}
40
  {%- for idx in range(messages|length-1, -1, -1) -%}
41
- {%- if messages[idx]['role'] == 'user' -%}
42
- {%- set ns.last_user_idx = idx -%}
43
  {%- break -%}
44
  {%- endif -%}
45
  {%- endfor -%}
46
 
47
- {# split all messages into history & suffix #}
48
- {%- set hist_msgs = messages[:ns.last_user_idx] -%}
49
- {%- set suffix_msgs = messages[ns.last_user_idx:] -%}
50
 
51
  {%- if tools -%}
52
  <|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}<|im_end|>
@@ -58,7 +58,7 @@
58
  {%- endif -%}
59
  {{set_roles(message)}}
60
  {%- if message['role'] == 'assistant' -%}
61
- think▷◁/think{{render_content(message)}}
62
  {%- if message.get('tool_calls') -%}
63
  {{render_toolcalls(message)}}
64
  {%- endif -%}
@@ -76,7 +76,7 @@
76
  {{set_roles(message)}}
77
  {%- if message['role'] == 'assistant' -%}
78
  {%- set rc = message.get('reasoning_content', '') -%}
79
- think{{rc}}◁/think{{render_content(message)}}
80
  {%- if message.get('tool_calls') -%}
81
  {{render_toolcalls(message)}}
82
  {%- endif -%}
 
35
  {%- endmacro -%}
36
 
37
 
38
+ {# Find last non-tool-call assisitant message #}
39
+ {%- set ns = namespace(last_non_tool_call_assistant_msg=-1) -%}
40
  {%- for idx in range(messages|length-1, -1, -1) -%}
41
+ {%- if messages[idx]['role'] == 'assistant' and not messages[idx].get('tool_calls') -%}
42
+ {%- set ns.last_non_tool_call_assistant_msg = idx -%}
43
  {%- break -%}
44
  {%- endif -%}
45
  {%- endfor -%}
46
 
47
+ {# split all messages into history & suffix, reasoning_content in suffix should be reserved.#}
48
+ {%- set hist_msgs = messages[:ns.last_non_tool_call_assistant_msg+1] -%}
49
+ {%- set suffix_msgs = messages[ns.last_non_tool_call_assistant_msg+1:] -%}
50
 
51
  {%- if tools -%}
52
  <|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}<|im_end|>
 
58
  {%- endif -%}
59
  {{set_roles(message)}}
60
  {%- if message['role'] == 'assistant' -%}
61
+ <think></think>{{render_content(message)}}
62
  {%- if message.get('tool_calls') -%}
63
  {{render_toolcalls(message)}}
64
  {%- endif -%}
 
76
  {{set_roles(message)}}
77
  {%- if message['role'] == 'assistant' -%}
78
  {%- set rc = message.get('reasoning_content', '') -%}
79
+ <think>{{rc}}</think>{{render_content(message)}}
80
  {%- if message.get('tool_calls') -%}
81
  {{render_toolcalls(message)}}
82
  {%- endif -%}
tokenizer_config.json CHANGED
@@ -121,7 +121,7 @@
121
  "special": true
122
  },
123
  "163606": {
124
- "content": "think",
125
  "lstrip": false,
126
  "normalized": false,
127
  "rstrip": false,
@@ -129,7 +129,7 @@
129
  "special": false
130
  },
131
  "163607": {
132
- "content": "◁/think",
133
  "lstrip": false,
134
  "normalized": false,
135
  "rstrip": false,
 
121
  "special": true
122
  },
123
  "163606": {
124
+ "content": "<think>",
125
  "lstrip": false,
126
  "normalized": false,
127
  "rstrip": false,
 
129
  "special": false
130
  },
131
  "163607": {
132
+ "content": "</think>",
133
  "lstrip": false,
134
  "normalized": false,
135
  "rstrip": false,