From 79c7f7df79abf9eb0d00b958a9fb3bef2a728578 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Mon, 16 Sep 2024 18:55:19 +0000 Subject: [PATCH 01/12] WIP --- examples/offline_chat_with_tools.py | 132 ++++++++++++++++++ tests/models/test_mistral.py | 60 ++++++++ tests/models/test_pixtral.py | 16 +-- vllm/entrypoints/llm.py | 5 +- vllm/entrypoints/openai/serving_chat.py | 9 +- vllm/transformers_utils/tokenizers/mistral.py | 5 +- 6 files changed, 211 insertions(+), 16 deletions(-) create mode 100644 examples/offline_chat_with_tools.py diff --git a/examples/offline_chat_with_tools.py b/examples/offline_chat_with_tools.py new file mode 100644 index 000000000000..432cc934ef3c --- /dev/null +++ b/examples/offline_chat_with_tools.py @@ -0,0 +1,132 @@ +import json +import random +import string + +from vllm import LLM +from vllm.sampling_params import SamplingParams + +# This script is an offline demo for running Pixtral. +# +# If you want to run a server/client setup, please follow this code: +# +# - Server: +# +# ```bash +# vllm serve mistralai/Mistral-7B-Instruct-v0.3 --tokenizer-mode mistral --load-format mistral --config-format mistral +# ``` +# +# - Client: +# +# ```bash +# curl --location 'http://:8000/v1/chat/completions' \ +# --header 'Content-Type: application/json' \ +# --header 'Authorization: Bearer token' \ +# --data '{ +# "model": "mistralai/Pixtral-12B-2409", +# "messages": [ +# { +# "role": "user", +# "content": [ +# {"type" : "text", "text": "Describe this image in detail please."}, +# {"type": "image_url", "image_url": {"url": "https://s3.amazonaws.com/cms.ipressroom.com/338/files/201808/5b894ee1a138352221103195_A680%7Ejogging-edit/A680%7Ejogging-edit_hero.jpg"}}, +# {"type" : "text", "text": "and this one as well. Answer in French."}, +# {"type": "image_url", "image_url": {"url": "https://www.wolframcloud.com/obj/resourcesystem/images/a0e/a0ee3983-46c6-4c92-b85d-059044639928/6af8cfb971db031b.png"}} +# ] +# } +# ] +# }' +# ``` +# +# Usage: +# python demo.py simple +# python demo.py advanced + +model_name = "mistralai/Mistral-7B-Instruct-v0.3" +# or switch to "mistralai/Mistral-Nemo-Instruct-2407" +# or "mistralai/Mistral-Large-Instruct-2407" +# or any other mistral model with function calling ability + +sampling_params = SamplingParams(max_tokens=8192, temperature=0.0) +llm = LLM(model=model_name, tokenizer_mode="mistral", config_format="mistral", load_format="mistral") + +def generate_random_id(length=9): + characters = string.ascii_letters + string.digits + random_id = ''.join(random.choice(characters) for _ in range(length)) + return random_id + + +# simulate an API that can be called +def get_current_weather(city: str, state: str, unit: 'str'): + return (f"The weather in {city}, {state} is 85 degrees {unit}. It is " + "partly cloudly, with highs in the 90's.") + + +tool_funtions = { + "get_current_weather": get_current_weather + +} + +tools = [{ + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": + "string", + "description": + "The city to find the weather for, e.g. 'San Francisco'" + }, + "state": { + "type": + "string", + "description": + "the two-letter abbreviation for the state that the city is" + " in, e.g. 'CA' which would mean 'California'" + }, + "unit": { + "type": "string", + "description": "The unit to fetch the temperature in", + "enum": ["celsius", "fahrenheit"] + } + }, + "required": ["city", "state", "unit"] + } + } +}] + +messages = [{ + "role": "user", + "content": "Can you tell me what the temperate will be in Dallas, in fahrenheit?" +}] + +outputs = llm.chat(messages, sampling_params=sampling_params, tools=tools) +output = outputs[0].outputs[0].text.strip() + +# append the assistant message +messages.append({ + "role": "assistant", + "content": output, +}) + +# let's now actually parse and execute the model's output simulating an API call by using the +# above defined function +tool_calls = json.loads(output) +tool_answers = [tool_funtions[call['name']](**call['arguments']) for call in tool_calls] + +# append the answer as a tool message and let the LLM give you an answer +messages.append({ + "role": "tool", + "content": "\n\n".join(tool_answers), + "tool_call_id": generate_random_id(), +}) + +outputs = llm.chat(messages, sampling_params, tools=tools) + +print(outputs[0].outputs[0].text.strip()) +# yields +# 'The weather in Dallas, TX is 85 degrees fahrenheit. ' +# 'It is partly cloudly, with highs in the 90's.' diff --git a/tests/models/test_mistral.py b/tests/models/test_mistral.py index 0741174497e3..4a79be10f458 100644 --- a/tests/models/test_mistral.py +++ b/tests/models/test_mistral.py @@ -3,14 +3,54 @@ Run `pytest tests/models/test_mistral.py`. """ import pytest +from vllm import SamplingParams from .utils import check_logprobs_close MODELS = [ "mistralai/Mistral-7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.3", + "mistralai/Mistral-Nemo-Instruct-2407" ] +SAMPLING_PARAMS = SamplingParams(max_tokens=512, temperature=0.0, logprobs=5) +TOOLS = [{ + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": + "string", + "description": + "The city to find the weather for, e.g. 'San Francisco'" + }, + "state": { + "type": + "string", + "description": + "the two-letter abbreviation for the state that the city is" + " in, e.g. 'CA' which would mean 'California'" + }, + "unit": { + "type": "string", + "description": "The unit to fetch the temperature in", + "enum": ["celsius", "fahrenheit"] + } + }, + "required": ["city", "state", "unit"] + } + } +}] + +MSGS = [{ + "role": "user", + "content": "Can you tell me what the temperate will be in Dallas, in fahrenheit?" +}] + @pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("dtype", ["bfloat16"]) @@ -81,3 +121,23 @@ def test_mistral_format( name_0="hf", name_1="mistral", ) + + +@pytest.mark.parametrize("dtype", ["bfloat16"]) +@pytest.mark.parametrize("model", MODELS[1:]) # v1 can't do func calling +def test_mistral_function_calling( + vllm_runner, + model: str, + dtype: str, +) -> None: + with vllm_runner( + model, + dtype=dtype, + tokenizer_mode="mistral", + config_format="mistral", + load_format="mistral" + ) as vllm_model: + outputs = vllm_model.model.chat(MSGS, tools=TOOLS, + sampling_params=SAMPLING_PARAMS) + + assert outputs[0].outputs[0].text.strip() == "" diff --git a/tests/models/test_pixtral.py b/tests/models/test_pixtral.py index 1fbfd77218ca..bc04505d96cf 100644 --- a/tests/models/test_pixtral.py +++ b/tests/models/test_pixtral.py @@ -112,10 +112,10 @@ def load_outputs_w_logprobs(filename: str) -> OutputsLogprobs: for tokens, text, logprobs in json_data] -@pytest.mark.skip( - reason= - "Model is too big, test passed on A100 locally but will OOM on CI machine." -) + # @pytest.mark.skip( + # reason= + # "Model is too big, test passed on A100 locally but will OOM on CI machine." + # ) @pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("max_model_len", MAX_MODEL_LEN) @pytest.mark.parametrize("dtype", ["bfloat16"]) @@ -148,10 +148,10 @@ def test_chat( name_1="output") -@pytest.mark.skip( - reason= - "Model is too big, test passed on A100 locally but will OOM on CI machine." -) + # @pytest.mark.skip( + # reason= + # "Model is too big, test passed on A100 locally but will OOM on CI machine." + # ) @pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("dtype", ["bfloat16"]) def test_model_engine(vllm_runner, model: str, dtype: str) -> None: diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index c01bffeb4289..8eaf65700cae 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -1,5 +1,5 @@ from contextlib import contextmanager -from typing import ClassVar, List, Optional, Sequence, Union, cast, overload +from typing import ClassVar, List, Optional, Sequence, Union, cast, overload, Any, Dict from tqdm import tqdm @@ -357,6 +357,7 @@ def chat( lora_request: Optional[LoRARequest] = None, chat_template: Optional[str] = None, add_generation_prompt: bool = True, + tools: Optional[List[Dict[str, Any]]] = None, ) -> List[RequestOutput]: """ Generate responses for a chat conversation. @@ -401,6 +402,7 @@ def chat( messages=messages, chat_template=chat_template, add_generation_prompt=add_generation_prompt, + tools=tools, ) else: prompt = apply_hf_chat_template( @@ -408,6 +410,7 @@ def chat( conversation=conversation, chat_template=chat_template, add_generation_prompt=add_generation_prompt, + tools=tools, ) inputs: PromptInputs diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 58e42fb5363f..12c6872a5ec3 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -123,7 +123,8 @@ async def create_chat_completion( ] prompt: Union[str, List[int]] - if isinstance(tokenizer, MistralTokenizer): + is_mistral_tokenizer = isinstance(tokenizer, MistralTokenizer) + if is_mistral_tokenizer: prompt = apply_mistral_chat_template( tokenizer, messages=request.messages, @@ -159,10 +160,10 @@ async def create_chat_completion( return self.create_error_response( "tool_choice = \"required\" is not supported!") - # "auto" tools requires --enable-auto-tool-choice - # and --tool-call-parser - if request.tool_choice == "auto" and not ( + if not is_mistral_tokenizer and request.tool_choice == "auto" and not ( self.enable_auto_tools and self.tool_parser is not None): + # for hf tokenizers, "auto" tools requires + # --enable-auto-tool-choice and --tool-call-parser return self.create_error_response( "\"auto\" tool choice requires " "--enable-auto-tool-choice and --tool-call-parser to be set") diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py index ea1910ed20ec..98b122a6f6c5 100644 --- a/vllm/transformers_utils/tokenizers/mistral.py +++ b/vllm/transformers_utils/tokenizers/mistral.py @@ -165,10 +165,9 @@ def apply_chat_template(self, messages: List["ChatCompletionMessageParam"], tools: Optional[Dict[str, Any]] = None, **kwargs) -> List[int]: - assert tools is None, "`tools` are not yet supported." request = ChatCompletionRequest( - messages=messages) # type: ignore[type-var] + messages=messages, tools=tools) # type: ignore[type-var] encoded = self.mistral.encode_chat_completion(request) # encode-decode to get clean prompt @@ -176,7 +175,7 @@ def apply_chat_template(self, def convert_tokens_to_string(self, tokens: List[str]) -> str: if isinstance(self.tokenizer, Tekkenizer): - return "".join(tokens) + return "".join([t for t in tokens if t not in self.tokenizer._all_special_tokens]) else: return self.tokenizer.decode(tokens) # type: ignore[arg-type] From 61f05726138fcd96c5ff0599d53657f8d48e0b84 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Mon, 16 Sep 2024 18:56:31 +0000 Subject: [PATCH 02/12] WIP --- tests/models/test_pixtral.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/models/test_pixtral.py b/tests/models/test_pixtral.py index bc04505d96cf..1fbfd77218ca 100644 --- a/tests/models/test_pixtral.py +++ b/tests/models/test_pixtral.py @@ -112,10 +112,10 @@ def load_outputs_w_logprobs(filename: str) -> OutputsLogprobs: for tokens, text, logprobs in json_data] - # @pytest.mark.skip( - # reason= - # "Model is too big, test passed on A100 locally but will OOM on CI machine." - # ) +@pytest.mark.skip( + reason= + "Model is too big, test passed on A100 locally but will OOM on CI machine." +) @pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("max_model_len", MAX_MODEL_LEN) @pytest.mark.parametrize("dtype", ["bfloat16"]) @@ -148,10 +148,10 @@ def test_chat( name_1="output") - # @pytest.mark.skip( - # reason= - # "Model is too big, test passed on A100 locally but will OOM on CI machine." - # ) +@pytest.mark.skip( + reason= + "Model is too big, test passed on A100 locally but will OOM on CI machine." +) @pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("dtype", ["bfloat16"]) def test_model_engine(vllm_runner, model: str, dtype: str) -> None: From cece5cf001a49f50e75d35e0fadf40a2f5052ca8 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Mon, 16 Sep 2024 18:59:05 +0000 Subject: [PATCH 03/12] WIP --- tests/models/test_mistral.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/models/test_mistral.py b/tests/models/test_mistral.py index 4a79be10f458..63ffd2d7d251 100644 --- a/tests/models/test_mistral.py +++ b/tests/models/test_mistral.py @@ -14,6 +14,8 @@ ] SAMPLING_PARAMS = SamplingParams(max_tokens=512, temperature=0.0, logprobs=5) + +# for function calling TOOLS = [{ "type": "function", "function": { @@ -45,11 +47,11 @@ } } }] - MSGS = [{ "role": "user", "content": "Can you tell me what the temperate will be in Dallas, in fahrenheit?" }] +EXPECTED_FUNC_CALL = '[{"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}]' @pytest.mark.parametrize("model", MODELS) @@ -140,4 +142,4 @@ def test_mistral_function_calling( outputs = vllm_model.model.chat(MSGS, tools=TOOLS, sampling_params=SAMPLING_PARAMS) - assert outputs[0].outputs[0].text.strip() == "" + assert outputs[0].outputs[0].text.strip() == EXPECTED_FUNC_CALL From 5a15813c19c411f7666155a3fe3e3ef293cc2265 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Mon, 16 Sep 2024 21:01:08 +0200 Subject: [PATCH 04/12] format --- examples/offline_chat_with_tools.py | 27 +++++++++++-------- .../decoder_only/language/test_mistral.py | 1 + vllm/entrypoints/llm.py | 3 ++- vllm/entrypoints/openai/serving_chat.py | 2 +- vllm/transformers_utils/tokenizers/mistral.py | 9 ++++--- 5 files changed, 26 insertions(+), 16 deletions(-) diff --git a/examples/offline_chat_with_tools.py b/examples/offline_chat_with_tools.py index 432cc934ef3c..05928a1a34e0 100644 --- a/examples/offline_chat_with_tools.py +++ b/examples/offline_chat_with_tools.py @@ -41,13 +41,17 @@ # python demo.py simple # python demo.py advanced -model_name = "mistralai/Mistral-7B-Instruct-v0.3" +model_name = "mistralai/Mistral-7B-Instruct-v0.3" # or switch to "mistralai/Mistral-Nemo-Instruct-2407" # or "mistralai/Mistral-Large-Instruct-2407" # or any other mistral model with function calling ability sampling_params = SamplingParams(max_tokens=8192, temperature=0.0) -llm = LLM(model=model_name, tokenizer_mode="mistral", config_format="mistral", load_format="mistral") +llm = LLM(model=model_name, + tokenizer_mode="mistral", + config_format="mistral", + load_format="mistral") + def generate_random_id(length=9): characters = string.ascii_letters + string.digits @@ -61,10 +65,7 @@ def get_current_weather(city: str, state: str, unit: 'str'): "partly cloudly, with highs in the 90's.") -tool_funtions = { - "get_current_weather": get_current_weather - -} +tool_funtions = {"get_current_weather": get_current_weather} tools = [{ "type": "function", @@ -99,8 +100,10 @@ def get_current_weather(city: str, state: str, unit: 'str'): }] messages = [{ - "role": "user", - "content": "Can you tell me what the temperate will be in Dallas, in fahrenheit?" + "role": + "user", + "content": + "Can you tell me what the temperate will be in Dallas, in fahrenheit?" }] outputs = llm.chat(messages, sampling_params=sampling_params, tools=tools) @@ -112,10 +115,12 @@ def get_current_weather(city: str, state: str, unit: 'str'): "content": output, }) -# let's now actually parse and execute the model's output simulating an API call by using the +# let's now actually parse and execute the model's output simulating an API call by using the # above defined function tool_calls = json.loads(output) -tool_answers = [tool_funtions[call['name']](**call['arguments']) for call in tool_calls] +tool_answers = [ + tool_funtions[call['name']](**call['arguments']) for call in tool_calls +] # append the answer as a tool message and let the LLM give you an answer messages.append({ @@ -127,6 +132,6 @@ def get_current_weather(city: str, state: str, unit: 'str'): outputs = llm.chat(messages, sampling_params, tools=tools) print(outputs[0].outputs[0].text.strip()) -# yields +# yields # 'The weather in Dallas, TX is 85 degrees fahrenheit. ' # 'It is partly cloudly, with highs in the 90's.' diff --git a/tests/models/decoder_only/language/test_mistral.py b/tests/models/decoder_only/language/test_mistral.py index ada67664fb9e..4d30adf51a71 100644 --- a/tests/models/decoder_only/language/test_mistral.py +++ b/tests/models/decoder_only/language/test_mistral.py @@ -3,6 +3,7 @@ Run `pytest tests/models/test_mistral.py`. """ import pytest + from vllm import SamplingParams from ...utils import check_logprobs_close diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index 8eaf65700cae..79ea5f0a9a93 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -1,5 +1,6 @@ from contextlib import contextmanager -from typing import ClassVar, List, Optional, Sequence, Union, cast, overload, Any, Dict +from typing import (Any, ClassVar, Dict, List, Optional, Sequence, Union, cast, + overload) from tqdm import tqdm diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 12c6872a5ec3..d28362a12abd 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -162,7 +162,7 @@ async def create_chat_completion( if not is_mistral_tokenizer and request.tool_choice == "auto" and not ( self.enable_auto_tools and self.tool_parser is not None): - # for hf tokenizers, "auto" tools requires + # for hf tokenizers, "auto" tools requires # --enable-auto-tool-choice and --tool-call-parser return self.create_error_response( "\"auto\" tool choice requires " diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py index 98b122a6f6c5..3349a4219bcb 100644 --- a/vllm/transformers_utils/tokenizers/mistral.py +++ b/vllm/transformers_utils/tokenizers/mistral.py @@ -166,8 +166,8 @@ def apply_chat_template(self, tools: Optional[Dict[str, Any]] = None, **kwargs) -> List[int]: - request = ChatCompletionRequest( - messages=messages, tools=tools) # type: ignore[type-var] + request = ChatCompletionRequest(messages=messages, + tools=tools) # type: ignore[type-var] encoded = self.mistral.encode_chat_completion(request) # encode-decode to get clean prompt @@ -175,7 +175,10 @@ def apply_chat_template(self, def convert_tokens_to_string(self, tokens: List[str]) -> str: if isinstance(self.tokenizer, Tekkenizer): - return "".join([t for t in tokens if t not in self.tokenizer._all_special_tokens]) + return "".join([ + t for t in tokens + if t not in self.tokenizer._all_special_tokens + ]) else: return self.tokenizer.decode(tokens) # type: ignore[arg-type] From 51ef13b25a97c16a42a2e128153b3223416699c8 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Mon, 16 Sep 2024 21:04:09 +0200 Subject: [PATCH 05/12] format --- examples/offline_chat_with_tools.py | 1 + tests/models/decoder_only/language/test_mistral.py | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/examples/offline_chat_with_tools.py b/examples/offline_chat_with_tools.py index 05928a1a34e0..339e8b5bbe97 100644 --- a/examples/offline_chat_with_tools.py +++ b/examples/offline_chat_with_tools.py @@ -1,3 +1,4 @@ +# ruff: noqa import json import random import string diff --git a/tests/models/decoder_only/language/test_mistral.py b/tests/models/decoder_only/language/test_mistral.py index 4d30adf51a71..295868678ca3 100644 --- a/tests/models/decoder_only/language/test_mistral.py +++ b/tests/models/decoder_only/language/test_mistral.py @@ -50,9 +50,13 @@ }] MSGS = [{ "role": "user", - "content": "Can you tell me what the temperate will be in Dallas, in fahrenheit?" + "content": ("Can you tell me what the temperate" + " will be in Dallas, in fahrenheit?") }] -EXPECTED_FUNC_CALL = '[{"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}]' +EXPECTED_FUNC_CALL = ( + '[{"name": "get_current_weather", "arguments": ' + '{"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}]' +) @pytest.mark.parametrize("model", MODELS) From ddec95464a2efb26b68557eafbe5757c59244085 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Mon, 16 Sep 2024 21:11:21 +0200 Subject: [PATCH 06/12] format --- .../decoder_only/language/test_mistral.py | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/tests/models/decoder_only/language/test_mistral.py b/tests/models/decoder_only/language/test_mistral.py index 295868678ca3..4b2e1ec96232 100644 --- a/tests/models/decoder_only/language/test_mistral.py +++ b/tests/models/decoder_only/language/test_mistral.py @@ -9,8 +9,7 @@ from ...utils import check_logprobs_close MODELS = [ - "mistralai/Mistral-7B-Instruct-v0.1", - "mistralai/Mistral-7B-Instruct-v0.3", + "mistralai/Mistral-7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.3", "mistralai/Mistral-Nemo-Instruct-2407" ] @@ -49,14 +48,14 @@ } }] MSGS = [{ - "role": "user", + "role": + "user", "content": ("Can you tell me what the temperate" - " will be in Dallas, in fahrenheit?") + " will be in Dallas, in fahrenheit?") }] EXPECTED_FUNC_CALL = ( '[{"name": "get_current_weather", "arguments": ' - '{"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}]' -) + '{"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}]') @pytest.mark.parametrize("model", MODELS) @@ -137,14 +136,13 @@ def test_mistral_function_calling( model: str, dtype: str, ) -> None: - with vllm_runner( - model, - dtype=dtype, - tokenizer_mode="mistral", - config_format="mistral", - load_format="mistral" - ) as vllm_model: - outputs = vllm_model.model.chat(MSGS, tools=TOOLS, + with vllm_runner(model, + dtype=dtype, + tokenizer_mode="mistral", + config_format="mistral", + load_format="mistral") as vllm_model: + outputs = vllm_model.model.chat(MSGS, + tools=TOOLS, sampling_params=SAMPLING_PARAMS) assert outputs[0].outputs[0].text.strip() == EXPECTED_FUNC_CALL From 1ec00bc99fa39349d20e2f4082de7e6bec2ab52c Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Tue, 17 Sep 2024 10:31:07 +0800 Subject: [PATCH 07/12] Apply suggestion --- vllm/transformers_utils/tokenizers/mistral.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py index 3349a4219bcb..405bc1b452b6 100644 --- a/vllm/transformers_utils/tokenizers/mistral.py +++ b/vllm/transformers_utils/tokenizers/mistral.py @@ -175,10 +175,10 @@ def apply_chat_template(self, def convert_tokens_to_string(self, tokens: List[str]) -> str: if isinstance(self.tokenizer, Tekkenizer): - return "".join([ + return "".join( t for t in tokens if t not in self.tokenizer._all_special_tokens - ]) + ) else: return self.tokenizer.decode(tokens) # type: ignore[arg-type] From 1d4de44b34a938fedf54b1ceaa5ac6203e7f4a54 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Tue, 17 Sep 2024 10:51:54 +0800 Subject: [PATCH 08/12] format --- vllm/transformers_utils/tokenizers/mistral.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py index 405bc1b452b6..7a228a3efa6e 100644 --- a/vllm/transformers_utils/tokenizers/mistral.py +++ b/vllm/transformers_utils/tokenizers/mistral.py @@ -175,10 +175,8 @@ def apply_chat_template(self, def convert_tokens_to_string(self, tokens: List[str]) -> str: if isinstance(self.tokenizer, Tekkenizer): - return "".join( - t for t in tokens - if t not in self.tokenizer._all_special_tokens - ) + return "".join(t for t in tokens + if t not in self.tokenizer._all_special_tokens) else: return self.tokenizer.decode(tokens) # type: ignore[arg-type] From aaf02a5a0a0184fd08160c570a6503c64b96e560 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Tue, 17 Sep 2024 08:31:47 +0200 Subject: [PATCH 09/12] Trigger CI build From 0575475d2bcf2874b6f7f2a728286248a3353ee4 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Tue, 17 Sep 2024 17:36:02 +0200 Subject: [PATCH 10/12] Up --- examples/offline_chat_with_tools.py | 4 ++-- tests/models/decoder_only/language/test_mistral.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/offline_chat_with_tools.py b/examples/offline_chat_with_tools.py index 339e8b5bbe97..e69a6c067e4d 100644 --- a/examples/offline_chat_with_tools.py +++ b/examples/offline_chat_with_tools.py @@ -6,7 +6,7 @@ from vllm import LLM from vllm.sampling_params import SamplingParams -# This script is an offline demo for running Pixtral. +# This script is an offline demo for function calling # # If you want to run a server/client setup, please follow this code: # @@ -23,7 +23,7 @@ # --header 'Content-Type: application/json' \ # --header 'Authorization: Bearer token' \ # --data '{ -# "model": "mistralai/Pixtral-12B-2409", +# "model": "mistralai/Mistral-7B-Instruct-v0.3" # "messages": [ # { # "role": "user", diff --git a/tests/models/decoder_only/language/test_mistral.py b/tests/models/decoder_only/language/test_mistral.py index 4b2e1ec96232..328dcad5db6b 100644 --- a/tests/models/decoder_only/language/test_mistral.py +++ b/tests/models/decoder_only/language/test_mistral.py @@ -10,7 +10,8 @@ MODELS = [ "mistralai/Mistral-7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.3", - "mistralai/Mistral-Nemo-Instruct-2407" + # Mistral-Nemo is to big for CI, but passes locally + # "mistralai/Mistral-Nemo-Instruct-2407" ] SAMPLING_PARAMS = SamplingParams(max_tokens=512, temperature=0.0, logprobs=5) From 1054b12c852baab2fd117da2bc4b594a37ce81ec Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Tue, 17 Sep 2024 17:36:28 +0200 Subject: [PATCH 11/12] fix tests --- tests/models/decoder_only/language/test_mistral.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/models/decoder_only/language/test_mistral.py b/tests/models/decoder_only/language/test_mistral.py index 328dcad5db6b..26f90456849f 100644 --- a/tests/models/decoder_only/language/test_mistral.py +++ b/tests/models/decoder_only/language/test_mistral.py @@ -9,7 +9,8 @@ from ...utils import check_logprobs_close MODELS = [ - "mistralai/Mistral-7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.3", + "mistralai/Mistral-7B-Instruct-v0.1", + "mistralai/Mistral-7B-Instruct-v0.3", # Mistral-Nemo is to big for CI, but passes locally # "mistralai/Mistral-Nemo-Instruct-2407" ] From f1e72d1497e48ca02b2f33e2f31b8cc56d76d3f1 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Tue, 17 Sep 2024 17:44:34 +0200 Subject: [PATCH 12/12] Trigger CI build