# /// script
# requires-python = ">=3.12"
# dependencies = ["transformers", "jinja2"]
# ///
from transformers import AutoTokenizer
def print_section(title, messages, tokenizers, **tokenizer_kwargs):
"""Helper function to print formatted sections"""
print(f"\n{'=' * 60}")
print(f"{title}")
print(f"{'=' * 60}")
print(f"\n{messages=}\n")
for tokenizer_name, tokenizer in tokenizers.items():
print(f"\n{tokenizer_name=}\n")
content = tokenizer.apply_chat_template(
messages, tokenize=False, **tokenizer_kwargs
)
print(content)
# Initialize tokenizer
local_tokenizer = AutoTokenizer.from_pretrained(".")
glm_tokenizer = AutoTokenizer.from_pretrained("zai-org/GLM-4.5-Air")
tokenizers = {"Local": local_tokenizer, "GLM-4.5-Air": glm_tokenizer}
# Only user message
print_section(
"User message only",
[{"role": "user", "content": "What is the capital of France?"}],
tokenizers,
)
# User message with generation prompt
print_section(
"User message with generation prompt",
[{"role": "user", "content": "What is the capital of France?"}],
tokenizers,
add_generation_prompt=True,
)
# User message with custom system message
print_section(
"Custom system message",
[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is the capital of France?"},
],
tokenizers,
)
# Single-turn with assistant response (no think)
print_section(
"Single-turn with assistant response (no think)",
[
{"role": "user", "content": "What is the capital of France?"},
{"role": "assistant", "content": "The capital of France is Paris."},
],
tokenizers,
)
# Single-turn with think embedded in content
print_section(
"Single-turn with think embedded in content",
[
{"role": "user", "content": "What is the capital of France?"},
{
"role": "assistant",
"content": "The user is asking about geography. France is a country in Europe, and its capital city is Paris. This is a straightforward factual question.\nThe capital of France is Paris.",
},
],
tokenizers,
)
# Single-turn with reasoning_content field
print_section(
"Single-turn with reasoning_content field",
[
{"role": "user", "content": "What is the capital of France?"},
{
"role": "assistant",
"content": "The capital of France is Paris.",
"reasoning_content": "The user is asking about geography. France is a country in Europe, and its capital city is Paris.",
},
],
tokenizers,
)
print_section(
"Single-turn with think section and reasoning_content field",
[
{"role": "user", "content": "What is the capital of France?"},
{
"role": "assistant",
"content": "The user is asking about geography. France is a country in Europe, and its capital city is Paris. This is a straightforward factual question.\nThe capital of France is Paris.",
"reasoning_content": "This should not be visible.",
},
],
tokenizers,
)
# Multi-turn and assistant response with think sections (embedded in content)
print_section(
"Multi-turn with think embedded in content",
[
{"role": "user", "content": "What is the capital of France?"},
{
"role": "assistant",
"content": "This is a basic geography question.\nThe capital of France is Paris.",
},
{"role": "user", "content": "What about Germany?"},
{
"role": "assistant",
"content": "Another geography question. Germany's capital is Berlin.\nThe capital of Germany is Berlin.",
},
],
tokenizers,
)
# Multi-turn and assistant response with think sections (embedded in content)
print_section(
"Multi-turn with reasoning_content field",
[
{"role": "user", "content": "What is the capital of France?"},
{
"role": "assistant",
"reasoning_content": "The user is asking about geography. France is a country in Europe, and its capital city is Paris.",
"content": "The capital of France is Paris.",
},
{"role": "user", "content": "What about Germany?"},
{
"role": "assistant",
"reasoning_content": "Another geography question. Germany's capital is Berlin.",
"content": "The capital of Germany is Berlin.",
},
],
tokenizers,
)
# Assistant with only think section, no visible content
print_section(
"Assistant with only think section",
[
{
"role": "user",
"content": "Think about this problem but don't respond yet.",
},
{
"role": "assistant",
"content": "The user wants me to think about something but not provide a response yet. I should just show my thinking process without any visible output.",
},
],
tokenizers,
)
# Assistant with unfinished think section
print_section(
"Assistant with unfinished think section",
[
{
"role": "user",
"content": "Think about this problem but don't respond yet.",
},
{
"role": "assistant",
"content": "The user wants me to think about something but not provide a response yet. I should just",
},
],
tokenizers,
)
print_section(
"Empty reasoning content",
[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Say hello"},
{
"role": "assistant",
"content": "Hello! How can I help you today?",
"reasoning_content": "",
},
],
tokenizers,
)
# ============================================================================
# EXAMPLE 7: Tool use scenario
# ============================================================================
tool_example = [
{"role": "user", "content": "What's the weather like in Paris?"},
{
"role": "assistant",
"content": "I'll check the weather in Paris for you.",
"reasoning_content": "I should use the get_weather tool for this.",
"tool_calls": [
{
"name": "get_weather",
"arguments": {"location": "Paris, France", "units": "celsius"},
}
],
},
{
"role": "tool",
"content": "Current weather in Paris: 18°C, partly cloudy with light winds.",
},
{
"role": "assistant",
"content": "The weather API returned current conditions for Paris. I should provide this information to the user in a clear format.\nThe current weather in Paris is 18°C with partly cloudy skies and light winds. It's a pleasant day!",
},
]
# Define tools for this example
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current weather information for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and country",
},
"units": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
},
}
]
print_section(
"Single-turn tool use with weather",
tool_example,
tokenizers,
tools=tools,
)
# ============================================================================
# EXAMPLE 8: Multiple tool calls in one response
# ============================================================================
multi_tool_example = [
{
"role": "user",
"content": "I need to calculate 15 * 23 and also get the current time.",
},
{
"role": "assistant",
"content": "The user wants two things: a calculation and the current time. I'll use two tools to get this information.\nI'll help you with both the calculation and getting the current time.",
"tool_calls": [
{"name": "calculate", "arguments": {"expression": "15 * 23"}},
{"name": "get_current_time", "arguments": {}},
],
},
{"role": "tool", "content": "345"},
{"role": "tool", "content": "2024-01-15T14:30:22Z"},
{
"role": "assistant",
"content": "Perfect! Here are your results:\n- 15 × 23 = 345\n- Current time: 2:30 PM UTC on January 15, 2024",
},
]
multi_tools = [
{
"type": "function",
"function": {
"name": "calculate",
"description": "Perform mathematical calculations",
"parameters": {
"type": "object",
"properties": {
"expression": {
"type": "string",
"description": "Mathematical expression to evaluate",
}
},
"required": ["expression"],
},
},
},
{
"type": "function",
"function": {
"name": "get_current_time",
"description": "Get the current date and time",
"parameters": {"type": "object", "properties": {}},
},
},
]
print_section(
"Single-turn with multiple tool calls",
multi_tool_example,
tokenizers,
tools=multi_tools,
)