fix agent deployment (modelscope#1592)

montanafang · Aug 4, 2024 · ddf5bbe · ddf5bbe
1 parent 59fd942
commit ddf5bbe
Show file tree

Hide file tree

Showing 3 changed files with 42 additions and 30 deletions.
diff --git a/docs/source/LLM/Agent部署最佳实践.md b/docs/source/LLM/Agent部署最佳实践.md
@@ -247,7 +247,7 @@ curl -X POST http://localhost:8000/v1/chat/completions \
 
 调用结果
 ```json
-{"model":"llama3-8b-instruct","choices":[[{"index":0,"message":{"role":"assistant","content":"Question: What's the weather like in Boston today?\n\nThought: I need to get the current weather in Boston to answer this question.\n\nAction: get_current_weather\n\nAction Input: {'location': 'Boston, MA', 'unit': 'fahrenheit'}\n\nObservation:","tool_calls":{"id":"toolcall-f534d907ae254f2ab96e06c25179ddf9","function":{"arguments":" {'location': 'Boston, MA', 'unit': 'fahrenheit'}\n\n","name":"get_current_weather"},"type":"function"}},"finish_reason":"stop"}]],"usage":{"prompt_tokens":262,"completion_tokens":54,"total_tokens":316},"id":"chatcmpl-8630e8d675c941c0aca958a37633a3c9","object":"chat.completion","created":1717590756}
+{"model":"llama3-8b-instruct","choices":[[{"index":0,"message":{"role":"assistant","content":"Question: What's the weather like in Boston today?\n\nThought: I need to get the current weather in Boston to answer this question.\n\nAction: get_current_weather\n\nAction Input: {'location': 'Boston, MA', 'unit': 'fahrenheit'}\n\nObservation:","tool_calls":[{"id":"toolcall-f534d907ae254f2ab96e06c25179ddf9","function":{"arguments":" {'location': 'Boston, MA', 'unit': 'fahrenheit'}\n\n","name":"get_current_weather"},"type":"function"}]},"finish_reason":"stop"}]],"usage":{"prompt_tokens":262,"completion_tokens":54,"total_tokens":316},"id":"chatcmpl-8630e8d675c941c0aca958a37633a3c9","object":"chat.completion","created":1717590756}
 ```
 
 在返回结果的tool_calls中，可以获得调用的函数以及参数信息。
@@ -291,7 +291,7 @@ resp = client.chat.completions.create(
     tools = tools,
     messages=messages,
     seed=42)
-tool_calls = resp.choices[0].message.tool_calls
+tool_calls = resp.choices[0].message.tool_calls[0]
 print(f'query: {query}')
 print(f'tool_calls: {tool_calls}')
 
@@ -307,7 +307,7 @@ print(f'query: {query}')
 print('response: ', end='')
 for chunk in stream_resp:
     print(chunk.choices[0].delta.content, end='', flush=True)
-print()
+print(chunk.choices[0].delta.tool_calls[0])
 
 """
 query: What's the weather like in Boston today?

diff --git a/docs/source_en/LLM/Agent-deployment-best-practice.md b/docs/source_en/LLM/Agent-deployment-best-practice.md
@@ -248,7 +248,7 @@ You can also select a tool from the tools field by specifying the `tool_choice`
 
 result
 ```json
-{"model":"llama3-8b-instruct","choices":[[{"index":0,"message":{"role":"assistant","content":"Question: What's the weather like in Boston today?\n\nThought: I need to get the current weather in Boston to answer this question.\n\nAction: get_current_weather\n\nAction Input: {'location': 'Boston, MA', 'unit': 'fahrenheit'}\n\nObservation:","tool_calls":{"id":"toolcall-f534d907ae254f2ab96e06c25179ddf9","function":{"arguments":" {'location': 'Boston, MA', 'unit': 'fahrenheit'}\n\n","name":"get_current_weather"},"type":"function"}},"finish_reason":"stop"}]],"usage":{"prompt_tokens":262,"completion_tokens":54,"total_tokens":316},"id":"chatcmpl-8630e8d675c941c0aca958a37633a3c9","object":"chat.completion","created":1717590756}
+{"model":"llama3-8b-instruct","choices":[[{"index":0,"message":{"role":"assistant","content":"Question: What's the weather like in Boston today?\n\nThought: I need to get the current weather in Boston to answer this question.\n\nAction: get_current_weather\n\nAction Input: {'location': 'Boston, MA', 'unit': 'fahrenheit'}\n\nObservation:","tool_calls":[{"id":"toolcall-f534d907ae254f2ab96e06c25179ddf9","function":{"arguments":" {'location': 'Boston, MA', 'unit': 'fahrenheit'}\n\n","name":"get_current_weather"},"type":"function"}]},"finish_reason":"stop"}]],"usage":{"prompt_tokens":262,"completion_tokens":54,"total_tokens":316},"id":"chatcmpl-8630e8d675c941c0aca958a37633a3c9","object":"chat.completion","created":1717590756}
 ```
 
 You can also test with OpenAI SDK, for example
@@ -290,7 +290,7 @@ resp = client.chat.completions.create(
     tools = tools,
     messages=messages,
     seed=42)
-tool_calls = resp.choices[0].message.tool_calls
+tool_calls = resp.choices[0].message.tool_calls[0]
 print(f'query: {query}')
 print(f'tool_calls: {tool_calls}')
 
@@ -306,7 +306,7 @@ print(f'query: {query}')
 print('response: ', end='')
 for chunk in stream_resp:
     print(chunk.choices[0].delta.content, end='', flush=True)
-print()
+print(chunk.choices[0].delta.tool_calls[0])
 
 """
 query: What's the weather like in Boston today?

diff --git a/swift/llm/deploy.py b/swift/llm/deploy.py
@@ -259,10 +259,12 @@ async def _generate_full():
                 action, action_input = split_action_action_input(response)
                 toolcall = None
                 if action is not None:
-                    toolcall = ChatCompletionMessageToolCall(
-                        id=f'toolcall-{random_uuid()}',
-                        type='function',
-                        function=Function(name=action, arguments=action_input))
+                    toolcall = [
+                        ChatCompletionMessageToolCall(
+                            id=f'toolcall-{random_uuid()}',
+                            type='function',
+                            function=Function(name=action, arguments=action_input))
+                    ]
                 choice = ChatCompletionResponseChoice(
                     index=output.index,
                     message=ChatMessage(role='assistant', content=response, tool_calls=toolcall),
@@ -307,10 +309,12 @@ async def _generate_stream():
                     if output.finish_reason is not None:
                         action, action_input = split_action_action_input(total_res[output.index])
                         if action is not None:
-                            toolcall = ChatCompletionMessageToolCall(
-                                id=f'toolcall-{random_uuid()}',
-                                type='function',
-                                function=Function(name=action, arguments=action_input))
+                            toolcall = [
+                                ChatCompletionMessageToolCall(
+                                    id=f'toolcall-{random_uuid()}',
+                                    type='function',
+                                    function=Function(name=action, arguments=action_input))
+                            ]
                     choice = ChatCompletionResponseStreamChoice(
                         index=output.index,
                         delta=DeltaMessage(role='assistant', content=output.delta_text, tool_calls=toolcall),
@@ -396,10 +400,12 @@ async def _generate_full():
             action, action_input = split_action_action_input(response)
             toolcall = None
             if action is not None:
-                toolcall = ChatCompletionMessageToolCall(
-                    id=f'toolcall-{random_uuid()}',
-                    type='function',
-                    function=Function(name=action, arguments=action_input))
+                toolcall = [
+                    ChatCompletionMessageToolCall(
+                        id=f'toolcall-{random_uuid()}',
+                        type='function',
+                        function=Function(name=action, arguments=action_input))
+                ]
             choices = [
                 ChatCompletionResponseChoice(
                     index=0,
@@ -451,10 +457,12 @@ async def _generate_stream():
                     if finish_reason == 'stop':
                         action, action_input = split_action_action_input(total_response)
                         if action is not None:
-                            toolcall = ChatCompletionMessageToolCall(
-                                id=f'toolcall-{random_uuid()}',
-                                type='function',
-                                function=Function(name=action, arguments=action_input))
+                            toolcall = [
+                                ChatCompletionMessageToolCall(
+                                    id=f'toolcall-{random_uuid()}',
+                                    type='function',
+                                    function=Function(name=action, arguments=action_input))
+                            ]
                     choices = [
                         ChatCompletionResponseStreamChoice(
                             index=0,
@@ -575,10 +583,12 @@ async def _generate_full():
             action, action_input = split_action_action_input(response)
             toolcall = None
             if action is not None:
-                toolcall = ChatCompletionMessageToolCall(
-                    id=f'toolcall-{random_uuid()}',
-                    type='function',
-                    function=Function(name=action, arguments=action_input))
+                toolcall = [
+                    ChatCompletionMessageToolCall(
+                        id=f'toolcall-{random_uuid()}',
+                        type='function',
+                        function=Function(name=action, arguments=action_input))
+                ]
             choices = [
                 ChatCompletionResponseChoice(
                     index=0,
@@ -631,10 +641,12 @@ def _generate_stream():
                 if is_finished:
                     action, action_input = split_action_action_input(response)
                     if action:
-                        toolcall = ChatCompletionMessageToolCall(
-                            id=f'toolcall-{random_uuid()}',
-                            type='function',
-                            function=Function(name=action, arguments=action_input))
+                        toolcall = [
+                            ChatCompletionMessageToolCall(
+                                id=f'toolcall-{random_uuid()}',
+                                type='function',
+                                function=Function(name=action, arguments=action_input))
+                        ]
                 choices = [
                     ChatCompletionResponseStreamChoice(
                         index=0,