make tools as list in chat_template + update readme

#17
by Jofthomas - opened
Files changed (2) hide show
  1. README.md +49 -0
  2. tokenizer_config.json +1 -1
README.md CHANGED
@@ -69,6 +69,55 @@ sp_tokenizer = tokenizer_v3.instruct_tokenizer.tokenizer
69
  decoded = sp_tokenizer.decode(generated_ids[0])
70
  print(decoded)
71
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  # Instruct tokenizer
74
  The HuggingFace tokenizer included in this release should match our own. To compare:
 
69
  decoded = sp_tokenizer.decode(generated_ids[0])
70
  print(decoded)
71
  ```
72
+ Alternatively, you can run this example with the Hugging Face tokenizer.
73
+ To use this example, you'll need transformers version 4.39.0 or higher.
74
+ ```console
75
+ pip install transformers==4.39.0
76
+ ```
77
+ ```python
78
+ from transformers import AutoModelForCausalLM, AutoTokenizer
79
+
80
+ model_id = "mistralai/Mixtral-8x22B-Instruct-v0.1"
81
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
82
+ conversation=[
83
+ {"role": "user", "content": "What's the weather like in Paris?"},
84
+ {
85
+ "role": "tool_calls",
86
+ "content": [
87
+ {
88
+ "name": "get_current_weather",
89
+ "arguments": {"location": "Paris, France", "format": "celsius"},
90
+
91
+ }
92
+ ]
93
+ },
94
+ {
95
+ "role": "tool_results",
96
+ "content": {"content": 22}
97
+ },
98
+ {"role": "assistant", "content": "The current temperature in Paris, France is 22 degrees Celsius."},
99
+ {"role": "user", "content": "What about San Francisco?"}
100
+ ]
101
+
102
+
103
+ tools = [{"type": "function", "function": {"name":"get_current_weather", "description": "Get▁the▁current▁weather", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "format": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The temperature unit to use. Infer this from the users location."}},"required":["location","format"]}}}]
104
+
105
+ # render the tool use prompt as a string:
106
+ tool_use_prompt = tokenizer.apply_chat_template(
107
+ conversation,
108
+ chat_template="tool_use",
109
+ tools=tools,
110
+ tokenize=False,
111
+ add_generation_prompt=True,
112
+
113
+ )
114
+ model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x22B-Instruct-v0.1")
115
+
116
+ inputs = tokenizer(tool_use_prompt, return_tensors="pt")
117
+
118
+ outputs = model.generate(**inputs, max_new_tokens=20)
119
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
120
+ ```
121
 
122
  # Instruct tokenizer
123
  The HuggingFace tokenizer included in this release should match our own. To compare:
tokenizer_config.json CHANGED
@@ -36,7 +36,7 @@
36
  },
37
  {
38
  "name": "tool_use",
39
- "template": "{{bos_token}}{% set user_messages = messages | selectattr('role', 'equalto', 'user') | list %}{% for message in messages %}{% if message['role'] == 'user' %}{% if message == user_messages[-1] %}{{ '[AVAILABLE_TOOLS]'}}{% for tool in tools %}{{ tool }}{% endfor %}{{ '[/AVAILABLE_TOOLS]'}}{{ '[INST]' + message['content'] + '[/INST]' }}{% else %}{{ '[INST]' + message['content'] + '[/INST]' }}{% endif %}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token}}{% elif message['role'] == 'tool_results' %}{{'[TOOL_RESULTS]' + message['content']|string + '[/TOOL_RESULTS]'}}{% elif message['role'] == 'tool_calls' %}{{'[TOOL_CALLS]' + message['content']|string + eos_token}}{% endif %}{% endfor %}"
40
  }
41
  ],
42
  "clean_up_tokenization_spaces": false,
 
36
  },
37
  {
38
  "name": "tool_use",
39
+ "template": "{{bos_token}}{% set user_messages = messages | selectattr('role', 'equalto', 'user') | list %}{% for message in messages %}{% if message['role'] == 'user' %}{% if message == user_messages[-1] %}{% if tools %}{{'[AVAILABLE_TOOLS]'+ tools|string + '[/AVAILABLE_TOOLS]'}}{% endif %}{{ '[INST]' + message['content'] + '[/INST]' }}{% else %}{{ '[INST]' + message['content'] + '[/INST]' }}{% endif %}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token}}{% elif message['role'] == 'tool_results' %}{{'[TOOL_RESULTS]' + message['content']|string + '[/TOOL_RESULTS]'}}{% elif message['role'] == 'tool_calls' %}{{'[TOOL_CALLS]' + message['content']|string + eos_token}}{% endif %}{% endfor %}"
40
  }
41
  ],
42
  "clean_up_tokenization_spaces": false,