From 79c7f7df79abf9eb0d00b958a9fb3bef2a728578 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Mon, 16 Sep 2024 18:55:19 +0000
Subject: [PATCH 01/12] WIP

---
 examples/offline_chat_with_tools.py           | 132 ++++++++++++++++++
 tests/models/test_mistral.py                  |  60 ++++++++
 tests/models/test_pixtral.py                  |  16 +--
 vllm/entrypoints/llm.py                       |   5 +-
 vllm/entrypoints/openai/serving_chat.py       |   9 +-
 vllm/transformers_utils/tokenizers/mistral.py |   5 +-
 6 files changed, 211 insertions(+), 16 deletions(-)
 create mode 100644 examples/offline_chat_with_tools.py
diff --git a/examples/offline_chat_with_tools.py b/examples/offline_chat_with_tools.py
new file mode 100644
index 000000000000..432cc934ef3c
--- /dev/null
+++ b/examples/offline_chat_with_tools.py
@@ -0,0 +1,132 @@
+import json
+import random
+import string
+
+from vllm import LLM
+from vllm.sampling_params import SamplingParams
+
+# This script is an offline demo for running Pixtral.
+#
+# If you want to run a server/client setup, please follow this code:
+#
+# - Server:
+#
+# ```bash
+# vllm serve mistralai/Mistral-7B-Instruct-v0.3 --tokenizer-mode mistral --load-format mistral --config-format mistral
+# ```
+#
+# - Client:
+#
+# ```bash
+# curl --location 'http://<your-node-url>:8000/v1/chat/completions' \
+# --header 'Content-Type: application/json' \
+# --header 'Authorization: Bearer token' \
+# --data '{
+#     "model": "mistralai/Pixtral-12B-2409",
+#     "messages": [
+#       {
+#         "role": "user",
+#         "content": [
+#             {"type" : "text", "text": "Describe this image in detail please."},
+#             {"type": "image_url", "image_url": {"url": "https://s3.amazonaws.com/cms.ipressroom.com/338/files/201808/5b894ee1a138352221103195_A680%7Ejogging-edit/A680%7Ejogging-edit_hero.jpg"}},
+#             {"type" : "text", "text": "and this one as well. Answer in French."},
+#             {"type": "image_url", "image_url": {"url": "https://www.wolframcloud.com/obj/resourcesystem/images/a0e/a0ee3983-46c6-4c92-b85d-059044639928/6af8cfb971db031b.png"}}
+#         ]
+#       }
+#     ]
+#   }'
+# ```
+#
+# Usage:
+#     python demo.py simple
+#     python demo.py advanced
+
+model_name = "mistralai/Mistral-7B-Instruct-v0.3"  
+# or switch to "mistralai/Mistral-Nemo-Instruct-2407"
+# or "mistralai/Mistral-Large-Instruct-2407"
+# or any other mistral model with function calling ability
+
+sampling_params = SamplingParams(max_tokens=8192, temperature=0.0)
+llm = LLM(model=model_name, tokenizer_mode="mistral", config_format="mistral", load_format="mistral")
+
+def generate_random_id(length=9):
+    characters = string.ascii_letters + string.digits
+    random_id = ''.join(random.choice(characters) for _ in range(length))
+    return random_id
+
+
+# simulate an API that can be called
+def get_current_weather(city: str, state: str, unit: 'str'):
+    return (f"The weather in {city}, {state} is 85 degrees {unit}. It is "
+            "partly cloudly, with highs in the 90's.")
+
+
+tool_funtions = {
+    "get_current_weather": get_current_weather
+
+}
+
+tools = [{
+    "type": "function",
+    "function": {
+        "name": "get_current_weather",
+        "description": "Get the current weather in a given location",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "city": {
+                    "type":
+                    "string",
+                    "description":
+                    "The city to find the weather for, e.g. 'San Francisco'"
+                },
+                "state": {
+                    "type":
+                    "string",
+                    "description":
+                    "the two-letter abbreviation for the state that the city is"
+                    " in, e.g. 'CA' which would mean 'California'"
+                },
+                "unit": {
+                    "type": "string",
+                    "description": "The unit to fetch the temperature in",
+                    "enum": ["celsius", "fahrenheit"]
+                }
+            },
+            "required": ["city", "state", "unit"]
+        }
+    }
+}]
+
+messages = [{
+    "role": "user",
+    "content": "Can you tell me what the temperate will be in Dallas, in fahrenheit?"
+}]
+
+outputs = llm.chat(messages, sampling_params=sampling_params, tools=tools)
+output = outputs[0].outputs[0].text.strip()
+
+# append the assistant message
+messages.append({
+    "role": "assistant",
+    "content": output,
+})
+
+# let's now actually parse and execute the model's output simulating an API call by using the 
+# above defined function
+tool_calls = json.loads(output)
+tool_answers = [tool_funtions[call['name']](**call['arguments']) for call in tool_calls]
+
+# append the answer as a tool message and let the LLM give you an answer
+messages.append({
+    "role": "tool",
+    "content": "\n\n".join(tool_answers),
+    "tool_call_id": generate_random_id(),
+})
+
+outputs = llm.chat(messages, sampling_params, tools=tools)
+
+print(outputs[0].outputs[0].text.strip())
+# yields 
+#   'The weather in Dallas, TX is 85 degrees fahrenheit. '
+#   'It is partly cloudly, with highs in the 90's.'
diff --git a/tests/models/test_mistral.py b/tests/models/test_mistral.py
index 0741174497e3..4a79be10f458 100644
--- a/tests/models/test_mistral.py
+++ b/tests/models/test_mistral.py
@@ -3,14 +3,54 @@
 Run `pytest tests/models/test_mistral.py`.
 """
 import pytest
+from vllm import SamplingParams
 
 from .utils import check_logprobs_close
 
 MODELS = [
     "mistralai/Mistral-7B-Instruct-v0.1",
     "mistralai/Mistral-7B-Instruct-v0.3",
+    "mistralai/Mistral-Nemo-Instruct-2407"
 ]
 
+SAMPLING_PARAMS = SamplingParams(max_tokens=512, temperature=0.0, logprobs=5)
+TOOLS = [{
+    "type": "function",
+    "function": {
+        "name": "get_current_weather",
+        "description": "Get the current weather in a given location",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "city": {
+                    "type":
+                    "string",
+                    "description":
+                    "The city to find the weather for, e.g. 'San Francisco'"
+                },
+                "state": {
+                    "type":
+                    "string",
+                    "description":
+                    "the two-letter abbreviation for the state that the city is"
+                    " in, e.g. 'CA' which would mean 'California'"
+                },
+                "unit": {
+                    "type": "string",
+                    "description": "The unit to fetch the temperature in",
+                    "enum": ["celsius", "fahrenheit"]
+                }
+            },
+            "required": ["city", "state", "unit"]
+        }
+    }
+}]
+
+MSGS = [{
+    "role": "user",
+    "content": "Can you tell me what the temperate will be in Dallas, in fahrenheit?"
+}]
+
 
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("dtype", ["bfloat16"])
@@ -81,3 +121,23 @@ def test_mistral_format(
         name_0="hf",
         name_1="mistral",
     )
+
+
+@pytest.mark.parametrize("dtype", ["bfloat16"])
+@pytest.mark.parametrize("model", MODELS[1:])  # v1 can't do func calling
+def test_mistral_function_calling(
+    vllm_runner,
+    model: str,
+    dtype: str,
+) -> None:
+    with vllm_runner(
+            model,
+            dtype=dtype,
+            tokenizer_mode="mistral",
+            config_format="mistral",
+            load_format="mistral"
+    ) as vllm_model:
+        outputs = vllm_model.model.chat(MSGS, tools=TOOLS,
+                                        sampling_params=SAMPLING_PARAMS)
+
+        assert outputs[0].outputs[0].text.strip() == ""
diff --git a/tests/models/test_pixtral.py b/tests/models/test_pixtral.py
index 1fbfd77218ca..bc04505d96cf 100644
--- a/tests/models/test_pixtral.py
+++ b/tests/models/test_pixtral.py
@@ -112,10 +112,10 @@ def load_outputs_w_logprobs(filename: str) -> OutputsLogprobs:
             for tokens, text, logprobs in json_data]
 
 
-@pytest.mark.skip(
-    reason=
-    "Model is too big, test passed on A100 locally but will OOM on CI machine."
-)
+    # @pytest.mark.skip(
+    #     reason=
+    #     "Model is too big, test passed on A100 locally but will OOM on CI machine."
+    # )
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("max_model_len", MAX_MODEL_LEN)
 @pytest.mark.parametrize("dtype", ["bfloat16"])
@@ -148,10 +148,10 @@ def test_chat(
                          name_1="output")
 
 
-@pytest.mark.skip(
-    reason=
-    "Model is too big, test passed on A100 locally but will OOM on CI machine."
-)
+    # @pytest.mark.skip(
+    #     reason=
+    #     "Model is too big, test passed on A100 locally but will OOM on CI machine."
+    # )
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("dtype", ["bfloat16"])
 def test_model_engine(vllm_runner, model: str, dtype: str) -> None:
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index c01bffeb4289..8eaf65700cae 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -1,5 +1,5 @@
 from contextlib import contextmanager
-from typing import ClassVar, List, Optional, Sequence, Union, cast, overload
+from typing import ClassVar, List, Optional, Sequence, Union, cast, overload, Any, Dict
 
 from tqdm import tqdm
 
@@ -357,6 +357,7 @@ def chat(
         lora_request: Optional[LoRARequest] = None,
         chat_template: Optional[str] = None,
         add_generation_prompt: bool = True,
+        tools: Optional[List[Dict[str, Any]]] = None,
     ) -> List[RequestOutput]:
         """
         Generate responses for a chat conversation.
@@ -401,6 +402,7 @@ def chat(
                 messages=messages,
                 chat_template=chat_template,
                 add_generation_prompt=add_generation_prompt,
+                tools=tools,
             )
         else:
             prompt = apply_hf_chat_template(
@@ -408,6 +410,7 @@ def chat(
                 conversation=conversation,
                 chat_template=chat_template,
                 add_generation_prompt=add_generation_prompt,
+                tools=tools,
             )
 
         inputs: PromptInputs
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index 58e42fb5363f..12c6872a5ec3 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -123,7 +123,8 @@ async def create_chat_completion(
             ]
 
             prompt: Union[str, List[int]]
-            if isinstance(tokenizer, MistralTokenizer):
+            is_mistral_tokenizer = isinstance(tokenizer, MistralTokenizer)
+            if is_mistral_tokenizer:
                 prompt = apply_mistral_chat_template(
                     tokenizer,
                     messages=request.messages,
@@ -159,10 +160,10 @@ async def create_chat_completion(
             return self.create_error_response(
                 "tool_choice = \"required\" is not supported!")
 
-            # "auto" tools requires --enable-auto-tool-choice
-            # and --tool-call-parser
-        if request.tool_choice == "auto" and not (
+        if not is_mistral_tokenizer and request.tool_choice == "auto" and not (
                 self.enable_auto_tools and self.tool_parser is not None):
+            # for hf tokenizers, "auto" tools requires 
+            # --enable-auto-tool-choice and --tool-call-parser
             return self.create_error_response(
                 "\"auto\" tool choice requires "
                 "--enable-auto-tool-choice and --tool-call-parser to be set")
diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py
index ea1910ed20ec..98b122a6f6c5 100644
--- a/vllm/transformers_utils/tokenizers/mistral.py
+++ b/vllm/transformers_utils/tokenizers/mistral.py
@@ -165,10 +165,9 @@ def apply_chat_template(self,
                             messages: List["ChatCompletionMessageParam"],
                             tools: Optional[Dict[str, Any]] = None,
                             **kwargs) -> List[int]:
-        assert tools is None, "`tools` are not yet supported."
 
         request = ChatCompletionRequest(
-            messages=messages)  # type: ignore[type-var]
+            messages=messages, tools=tools)  # type: ignore[type-var]
         encoded = self.mistral.encode_chat_completion(request)
 
         # encode-decode to get clean prompt
@@ -176,7 +175,7 @@ def apply_chat_template(self,
 
     def convert_tokens_to_string(self, tokens: List[str]) -> str:
         if isinstance(self.tokenizer, Tekkenizer):
-            return "".join(tokens)
+            return "".join([t for t in tokens if t not in self.tokenizer._all_special_tokens])
         else:
             return self.tokenizer.decode(tokens)  # type: ignore[arg-type]
 

From 61f05726138fcd96c5ff0599d53657f8d48e0b84 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Mon, 16 Sep 2024 18:56:31 +0000
Subject: [PATCH 02/12] WIP

---
 tests/models/test_pixtral.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/models/test_pixtral.py b/tests/models/test_pixtral.py
index bc04505d96cf..1fbfd77218ca 100644
--- a/tests/models/test_pixtral.py
+++ b/tests/models/test_pixtral.py
@@ -112,10 +112,10 @@ def load_outputs_w_logprobs(filename: str) -> OutputsLogprobs:
             for tokens, text, logprobs in json_data]
 
 
-    # @pytest.mark.skip(
-    #     reason=
-    #     "Model is too big, test passed on A100 locally but will OOM on CI machine."
-    # )
+@pytest.mark.skip(
+    reason=
+    "Model is too big, test passed on A100 locally but will OOM on CI machine."
+)
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("max_model_len", MAX_MODEL_LEN)
 @pytest.mark.parametrize("dtype", ["bfloat16"])
@@ -148,10 +148,10 @@ def test_chat(
                          name_1="output")
 
 
-    # @pytest.mark.skip(
-    #     reason=
-    #     "Model is too big, test passed on A100 locally but will OOM on CI machine."
-    # )
+@pytest.mark.skip(
+    reason=
+    "Model is too big, test passed on A100 locally but will OOM on CI machine."
+)
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("dtype", ["bfloat16"])
 def test_model_engine(vllm_runner, model: str, dtype: str) -> None:

From cece5cf001a49f50e75d35e0fadf40a2f5052ca8 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Mon, 16 Sep 2024 18:59:05 +0000
Subject: [PATCH 03/12] WIP

---
 tests/models/test_mistral.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/models/test_mistral.py b/tests/models/test_mistral.py
index 4a79be10f458..63ffd2d7d251 100644
--- a/tests/models/test_mistral.py
+++ b/tests/models/test_mistral.py
@@ -14,6 +14,8 @@
 ]
 
 SAMPLING_PARAMS = SamplingParams(max_tokens=512, temperature=0.0, logprobs=5)
+
+# for function calling
 TOOLS = [{
     "type": "function",
     "function": {
@@ -45,11 +47,11 @@
         }
     }
 }]
-
 MSGS = [{
     "role": "user",
     "content": "Can you tell me what the temperate will be in Dallas, in fahrenheit?"
 }]
+EXPECTED_FUNC_CALL = '[{"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}]'
 
 
 @pytest.mark.parametrize("model", MODELS)
@@ -140,4 +142,4 @@ def test_mistral_function_calling(
         outputs = vllm_model.model.chat(MSGS, tools=TOOLS,
                                         sampling_params=SAMPLING_PARAMS)
 
-        assert outputs[0].outputs[0].text.strip() == ""
+        assert outputs[0].outputs[0].text.strip() == EXPECTED_FUNC_CALL

From 5a15813c19c411f7666155a3fe3e3ef293cc2265 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Mon, 16 Sep 2024 21:01:08 +0200
Subject: [PATCH 04/12] format

---
 examples/offline_chat_with_tools.py           | 27 +++++++++++--------
 .../decoder_only/language/test_mistral.py     |  1 +
 vllm/entrypoints/llm.py                       |  3 ++-
 vllm/entrypoints/openai/serving_chat.py       |  2 +-
 vllm/transformers_utils/tokenizers/mistral.py |  9 ++++---
 5 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/examples/offline_chat_with_tools.py b/examples/offline_chat_with_tools.py
index 432cc934ef3c..05928a1a34e0 100644
--- a/examples/offline_chat_with_tools.py
+++ b/examples/offline_chat_with_tools.py
@@ -41,13 +41,17 @@
 #     python demo.py simple
 #     python demo.py advanced
 
-model_name = "mistralai/Mistral-7B-Instruct-v0.3"  
+model_name = "mistralai/Mistral-7B-Instruct-v0.3"
 # or switch to "mistralai/Mistral-Nemo-Instruct-2407"
 # or "mistralai/Mistral-Large-Instruct-2407"
 # or any other mistral model with function calling ability
 
 sampling_params = SamplingParams(max_tokens=8192, temperature=0.0)
-llm = LLM(model=model_name, tokenizer_mode="mistral", config_format="mistral", load_format="mistral")
+llm = LLM(model=model_name,
+          tokenizer_mode="mistral",
+          config_format="mistral",
+          load_format="mistral")
+
 
 def generate_random_id(length=9):
     characters = string.ascii_letters + string.digits
@@ -61,10 +65,7 @@ def get_current_weather(city: str, state: str, unit: 'str'):
             "partly cloudly, with highs in the 90's.")
 
 
-tool_funtions = {
-    "get_current_weather": get_current_weather
-
-}
+tool_funtions = {"get_current_weather": get_current_weather}
 
 tools = [{
     "type": "function",
@@ -99,8 +100,10 @@ def get_current_weather(city: str, state: str, unit: 'str'):
 }]
 
 messages = [{
-    "role": "user",
-    "content": "Can you tell me what the temperate will be in Dallas, in fahrenheit?"
+    "role":
+    "user",
+    "content":
+    "Can you tell me what the temperate will be in Dallas, in fahrenheit?"
 }]
 
 outputs = llm.chat(messages, sampling_params=sampling_params, tools=tools)
@@ -112,10 +115,12 @@ def get_current_weather(city: str, state: str, unit: 'str'):
     "content": output,
 })
 
-# let's now actually parse and execute the model's output simulating an API call by using the 
+# let's now actually parse and execute the model's output simulating an API call by using the
 # above defined function
 tool_calls = json.loads(output)
-tool_answers = [tool_funtions[call['name']](**call['arguments']) for call in tool_calls]
+tool_answers = [
+    tool_funtions[call['name']](**call['arguments']) for call in tool_calls
+]
 
 # append the answer as a tool message and let the LLM give you an answer
 messages.append({
@@ -127,6 +132,6 @@ def get_current_weather(city: str, state: str, unit: 'str'):
 outputs = llm.chat(messages, sampling_params, tools=tools)
 
 print(outputs[0].outputs[0].text.strip())
-# yields 
+# yields
 #   'The weather in Dallas, TX is 85 degrees fahrenheit. '
 #   'It is partly cloudly, with highs in the 90's.'
diff --git a/tests/models/decoder_only/language/test_mistral.py b/tests/models/decoder_only/language/test_mistral.py
index ada67664fb9e..4d30adf51a71 100644
--- a/tests/models/decoder_only/language/test_mistral.py
+++ b/tests/models/decoder_only/language/test_mistral.py
@@ -3,6 +3,7 @@
 Run `pytest tests/models/test_mistral.py`.
 """
 import pytest
+
 from vllm import SamplingParams
 
 from ...utils import check_logprobs_close
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index 8eaf65700cae..79ea5f0a9a93 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -1,5 +1,6 @@
 from contextlib import contextmanager
-from typing import ClassVar, List, Optional, Sequence, Union, cast, overload, Any, Dict
+from typing import (Any, ClassVar, Dict, List, Optional, Sequence, Union, cast,
+                    overload)
 
 from tqdm import tqdm
 
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index 12c6872a5ec3..d28362a12abd 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -162,7 +162,7 @@ async def create_chat_completion(
 
         if not is_mistral_tokenizer and request.tool_choice == "auto" and not (
                 self.enable_auto_tools and self.tool_parser is not None):
-            # for hf tokenizers, "auto" tools requires 
+            # for hf tokenizers, "auto" tools requires
             # --enable-auto-tool-choice and --tool-call-parser
             return self.create_error_response(
                 "\"auto\" tool choice requires "
diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py
index 98b122a6f6c5..3349a4219bcb 100644
--- a/vllm/transformers_utils/tokenizers/mistral.py
+++ b/vllm/transformers_utils/tokenizers/mistral.py
@@ -166,8 +166,8 @@ def apply_chat_template(self,
                             tools: Optional[Dict[str, Any]] = None,
                             **kwargs) -> List[int]:
 
-        request = ChatCompletionRequest(
-            messages=messages, tools=tools)  # type: ignore[type-var]
+        request = ChatCompletionRequest(messages=messages,
+                                        tools=tools)  # type: ignore[type-var]
         encoded = self.mistral.encode_chat_completion(request)
 
         # encode-decode to get clean prompt
@@ -175,7 +175,10 @@ def apply_chat_template(self,
 
     def convert_tokens_to_string(self, tokens: List[str]) -> str:
         if isinstance(self.tokenizer, Tekkenizer):
-            return "".join([t for t in tokens if t not in self.tokenizer._all_special_tokens])
+            return "".join([
+                t for t in tokens
+                if t not in self.tokenizer._all_special_tokens
+            ])
         else:
             return self.tokenizer.decode(tokens)  # type: ignore[arg-type]
 

From 51ef13b25a97c16a42a2e128153b3223416699c8 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Mon, 16 Sep 2024 21:04:09 +0200
Subject: [PATCH 05/12] format

---
 examples/offline_chat_with_tools.py                | 1 +
 tests/models/decoder_only/language/test_mistral.py | 8 ++++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/examples/offline_chat_with_tools.py b/examples/offline_chat_with_tools.py
index 05928a1a34e0..339e8b5bbe97 100644
--- a/examples/offline_chat_with_tools.py
+++ b/examples/offline_chat_with_tools.py
@@ -1,3 +1,4 @@
+# ruff: noqa
 import json
 import random
 import string
diff --git a/tests/models/decoder_only/language/test_mistral.py b/tests/models/decoder_only/language/test_mistral.py
index 4d30adf51a71..295868678ca3 100644
--- a/tests/models/decoder_only/language/test_mistral.py
+++ b/tests/models/decoder_only/language/test_mistral.py
@@ -50,9 +50,13 @@
 }]
 MSGS = [{
     "role": "user",
-    "content": "Can you tell me what the temperate will be in Dallas, in fahrenheit?"
+    "content": ("Can you tell me what the temperate"
+        " will be in Dallas, in fahrenheit?")
 }]
-EXPECTED_FUNC_CALL = '[{"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}]'
+EXPECTED_FUNC_CALL = (
+    '[{"name": "get_current_weather", "arguments": '
+    '{"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}]'
+)
 
 
 @pytest.mark.parametrize("model", MODELS)

From ddec95464a2efb26b68557eafbe5757c59244085 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Mon, 16 Sep 2024 21:11:21 +0200
Subject: [PATCH 06/12] format

---
 .../decoder_only/language/test_mistral.py     | 26 +++++++++----------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/tests/models/decoder_only/language/test_mistral.py b/tests/models/decoder_only/language/test_mistral.py
index 295868678ca3..4b2e1ec96232 100644
--- a/tests/models/decoder_only/language/test_mistral.py
+++ b/tests/models/decoder_only/language/test_mistral.py
@@ -9,8 +9,7 @@
 from ...utils import check_logprobs_close
 
 MODELS = [
-    "mistralai/Mistral-7B-Instruct-v0.1",
-    "mistralai/Mistral-7B-Instruct-v0.3",
+    "mistralai/Mistral-7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.3",
     "mistralai/Mistral-Nemo-Instruct-2407"
 ]
 
@@ -49,14 +48,14 @@
     }
 }]
 MSGS = [{
-    "role": "user",
+    "role":
+    "user",
     "content": ("Can you tell me what the temperate"
-        " will be in Dallas, in fahrenheit?")
+                " will be in Dallas, in fahrenheit?")
 }]
 EXPECTED_FUNC_CALL = (
     '[{"name": "get_current_weather", "arguments": '
-    '{"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}]'
-)
+    '{"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}]')
 
 
 @pytest.mark.parametrize("model", MODELS)
@@ -137,14 +136,13 @@ def test_mistral_function_calling(
     model: str,
     dtype: str,
 ) -> None:
-    with vllm_runner(
-            model,
-            dtype=dtype,
-            tokenizer_mode="mistral",
-            config_format="mistral",
-            load_format="mistral"
-    ) as vllm_model:
-        outputs = vllm_model.model.chat(MSGS, tools=TOOLS,
+    with vllm_runner(model,
+                     dtype=dtype,
+                     tokenizer_mode="mistral",
+                     config_format="mistral",
+                     load_format="mistral") as vllm_model:
+        outputs = vllm_model.model.chat(MSGS,
+                                        tools=TOOLS,
                                         sampling_params=SAMPLING_PARAMS)
 
         assert outputs[0].outputs[0].text.strip() == EXPECTED_FUNC_CALL

From 1ec00bc99fa39349d20e2f4082de7e6bec2ab52c Mon Sep 17 00:00:00 2001
From: Cyrus Leung <cyrus.tl.leung@gmail.com>
Date: Tue, 17 Sep 2024 10:31:07 +0800
Subject: [PATCH 07/12] Apply suggestion

---
 vllm/transformers_utils/tokenizers/mistral.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py
index 3349a4219bcb..405bc1b452b6 100644
--- a/vllm/transformers_utils/tokenizers/mistral.py
+++ b/vllm/transformers_utils/tokenizers/mistral.py
@@ -175,10 +175,10 @@ def apply_chat_template(self,
 
     def convert_tokens_to_string(self, tokens: List[str]) -> str:
         if isinstance(self.tokenizer, Tekkenizer):
-            return "".join([
+            return "".join(
                 t for t in tokens
                 if t not in self.tokenizer._all_special_tokens
-            ])
+            )
         else:
             return self.tokenizer.decode(tokens)  # type: ignore[arg-type]
 

From 1d4de44b34a938fedf54b1ceaa5ac6203e7f4a54 Mon Sep 17 00:00:00 2001
From: Cyrus Leung <cyrus.tl.leung@gmail.com>
Date: Tue, 17 Sep 2024 10:51:54 +0800
Subject: [PATCH 08/12] format

---
 vllm/transformers_utils/tokenizers/mistral.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py
index 405bc1b452b6..7a228a3efa6e 100644
--- a/vllm/transformers_utils/tokenizers/mistral.py
+++ b/vllm/transformers_utils/tokenizers/mistral.py
@@ -175,10 +175,8 @@ def apply_chat_template(self,
 
     def convert_tokens_to_string(self, tokens: List[str]) -> str:
         if isinstance(self.tokenizer, Tekkenizer):
-            return "".join(
-                t for t in tokens
-                if t not in self.tokenizer._all_special_tokens
-            )
+            return "".join(t for t in tokens
+                           if t not in self.tokenizer._all_special_tokens)
         else:
             return self.tokenizer.decode(tokens)  # type: ignore[arg-type]
 

From aaf02a5a0a0184fd08160c570a6503c64b96e560 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Tue, 17 Sep 2024 08:31:47 +0200
Subject: [PATCH 09/12] Trigger CI build


From 0575475d2bcf2874b6f7f2a728286248a3353ee4 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Tue, 17 Sep 2024 17:36:02 +0200
Subject: [PATCH 10/12] Up

---
 examples/offline_chat_with_tools.py                | 4 ++--
 tests/models/decoder_only/language/test_mistral.py | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/examples/offline_chat_with_tools.py b/examples/offline_chat_with_tools.py
index 339e8b5bbe97..e69a6c067e4d 100644
--- a/examples/offline_chat_with_tools.py
+++ b/examples/offline_chat_with_tools.py
@@ -6,7 +6,7 @@
 from vllm import LLM
 from vllm.sampling_params import SamplingParams
 
-# This script is an offline demo for running Pixtral.
+# This script is an offline demo for function calling
 #
 # If you want to run a server/client setup, please follow this code:
 #
@@ -23,7 +23,7 @@
 # --header 'Content-Type: application/json' \
 # --header 'Authorization: Bearer token' \
 # --data '{
-#     "model": "mistralai/Pixtral-12B-2409",
+#     "model": "mistralai/Mistral-7B-Instruct-v0.3"
 #     "messages": [
 #       {
 #         "role": "user",
diff --git a/tests/models/decoder_only/language/test_mistral.py b/tests/models/decoder_only/language/test_mistral.py
index 4b2e1ec96232..328dcad5db6b 100644
--- a/tests/models/decoder_only/language/test_mistral.py
+++ b/tests/models/decoder_only/language/test_mistral.py
@@ -10,7 +10,8 @@
 
 MODELS = [
     "mistralai/Mistral-7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.3",
-    "mistralai/Mistral-Nemo-Instruct-2407"
+    # Mistral-Nemo is to big for CI, but passes locally
+    # "mistralai/Mistral-Nemo-Instruct-2407"
 ]
 
 SAMPLING_PARAMS = SamplingParams(max_tokens=512, temperature=0.0, logprobs=5)

From 1054b12c852baab2fd117da2bc4b594a37ce81ec Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Tue, 17 Sep 2024 17:36:28 +0200
Subject: [PATCH 11/12] fix tests

---
 tests/models/decoder_only/language/test_mistral.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/models/decoder_only/language/test_mistral.py b/tests/models/decoder_only/language/test_mistral.py
index 328dcad5db6b..26f90456849f 100644
--- a/tests/models/decoder_only/language/test_mistral.py
+++ b/tests/models/decoder_only/language/test_mistral.py
@@ -9,7 +9,8 @@
 from ...utils import check_logprobs_close
 
 MODELS = [
-    "mistralai/Mistral-7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.3",
+    "mistralai/Mistral-7B-Instruct-v0.1",
+    "mistralai/Mistral-7B-Instruct-v0.3",
     # Mistral-Nemo is to big for CI, but passes locally
     # "mistralai/Mistral-Nemo-Instruct-2407"
 ]

From f1e72d1497e48ca02b2f33e2f31b8cc56d76d3f1 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Tue, 17 Sep 2024 17:44:34 +0200
Subject: [PATCH 12/12] Trigger CI build