forked from EngineX-Cambricon/enginex-mlu370-vllm
add qwen3
This commit is contained in:
0
vllm-v0.6.2/tests/tool_use/__init__.py
Normal file
0
vllm-v0.6.2/tests/tool_use/__init__.py
Normal file
38
vllm-v0.6.2/tests/tool_use/conftest.py
Normal file
38
vllm-v0.6.2/tests/tool_use/conftest.py
Normal file
@@ -0,0 +1,38 @@
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from huggingface_hub import snapshot_download
|
||||
|
||||
from tests.utils import RemoteOpenAIServer
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
from .utils import ARGS, CONFIGS, ServerConfig
|
||||
|
||||
|
||||
# for each server config, download the model and return the config
|
||||
@pytest.fixture(scope="session", params=CONFIGS.keys())
|
||||
def server_config(request):
|
||||
config = CONFIGS[request.param]
|
||||
|
||||
if current_platform.is_rocm() and not config.get("supports_rocm", True):
|
||||
pytest.skip("The {} model can't be tested on the ROCm platform".format(
|
||||
config["model"]))
|
||||
|
||||
# download model and tokenizer using transformers
|
||||
snapshot_download(config["model"])
|
||||
yield CONFIGS[request.param]
|
||||
|
||||
|
||||
# run this for each server config
|
||||
@pytest.fixture(scope="session")
|
||||
def server(request, server_config: ServerConfig):
|
||||
model = server_config["model"]
|
||||
args_for_model = server_config["arguments"]
|
||||
with RemoteOpenAIServer(model, ARGS + args_for_model,
|
||||
max_wait_seconds=480) as server:
|
||||
yield server
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def client(server: RemoteOpenAIServer):
|
||||
async with server.get_async_client() as async_client:
|
||||
yield async_client
|
||||
@@ -0,0 +1,71 @@
|
||||
import pytest
|
||||
|
||||
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
|
||||
|
||||
|
||||
def test_chat_completion_request_with_no_tools():
|
||||
# tools key is not present
|
||||
request = ChatCompletionRequest.model_validate({
|
||||
'messages': [{
|
||||
'role': 'user',
|
||||
'content': 'Hello'
|
||||
}],
|
||||
'model':
|
||||
'facebook/opt-125m',
|
||||
})
|
||||
assert request.tool_choice == 'none'
|
||||
|
||||
# tools key is None
|
||||
request = ChatCompletionRequest.model_validate({
|
||||
'messages': [{
|
||||
'role': 'user',
|
||||
'content': 'Hello'
|
||||
}],
|
||||
'model':
|
||||
'facebook/opt-125m',
|
||||
'tools':
|
||||
None
|
||||
})
|
||||
assert request.tool_choice == 'none'
|
||||
|
||||
# tools key present but empty
|
||||
request = ChatCompletionRequest.model_validate({
|
||||
'messages': [{
|
||||
'role': 'user',
|
||||
'content': 'Hello'
|
||||
}],
|
||||
'model':
|
||||
'facebook/opt-125m',
|
||||
'tools': []
|
||||
})
|
||||
assert request.tool_choice == 'none'
|
||||
|
||||
|
||||
def test_chat_completion_request_with_tool_choice_but_no_tools():
|
||||
with pytest.raises(ValueError,
|
||||
match="When using `tool_choice`, `tools` must be set."):
|
||||
ChatCompletionRequest.model_validate({
|
||||
'messages': [{
|
||||
'role': 'user',
|
||||
'content': 'Hello'
|
||||
}],
|
||||
'model':
|
||||
'facebook/opt-125m',
|
||||
'tool_choice':
|
||||
'auto'
|
||||
})
|
||||
|
||||
with pytest.raises(ValueError,
|
||||
match="When using `tool_choice`, `tools` must be set."):
|
||||
ChatCompletionRequest.model_validate({
|
||||
'messages': [{
|
||||
'role': 'user',
|
||||
'content': 'Hello'
|
||||
}],
|
||||
'model':
|
||||
'facebook/opt-125m',
|
||||
'tool_choice':
|
||||
'auto',
|
||||
'tools':
|
||||
None
|
||||
})
|
||||
146
vllm-v0.6.2/tests/tool_use/test_chat_completions.py
Normal file
146
vllm-v0.6.2/tests/tool_use/test_chat_completions.py
Normal file
@@ -0,0 +1,146 @@
|
||||
from typing import List
|
||||
|
||||
import openai
|
||||
import pytest
|
||||
|
||||
from .utils import (MESSAGES_WITHOUT_TOOLS, WEATHER_TOOL, ServerConfig,
|
||||
ensure_system_prompt)
|
||||
|
||||
|
||||
# test: make sure chat completions without tools provided work even when tools
|
||||
# are enabled. This makes sure tool call chat templates work, AND that the tool
|
||||
# parser stream processing doesn't change the output of the model.
|
||||
@pytest.mark.asyncio
|
||||
async def test_chat_completion_without_tools(client: openai.AsyncOpenAI,
|
||||
server_config: ServerConfig):
|
||||
models = await client.models.list()
|
||||
model_name: str = models.data[0].id
|
||||
chat_completion = await client.chat.completions.create(
|
||||
messages=ensure_system_prompt(MESSAGES_WITHOUT_TOOLS, server_config),
|
||||
temperature=0,
|
||||
max_completion_tokens=150,
|
||||
model=model_name,
|
||||
logprobs=False)
|
||||
choice = chat_completion.choices[0]
|
||||
stop_reason = chat_completion.choices[0].finish_reason
|
||||
output_text = chat_completion.choices[0].message.content
|
||||
|
||||
# check to make sure we got text
|
||||
assert output_text is not None
|
||||
assert len(output_text) > 0
|
||||
assert stop_reason != "tool_calls"
|
||||
|
||||
# check to make sure no tool calls were returned
|
||||
assert (choice.message.tool_calls is None
|
||||
or len(choice.message.tool_calls) == 0)
|
||||
|
||||
# make the same request, streaming
|
||||
stream = await client.chat.completions.create(
|
||||
messages=ensure_system_prompt(MESSAGES_WITHOUT_TOOLS, server_config),
|
||||
temperature=0,
|
||||
max_completion_tokens=150,
|
||||
model=model_name,
|
||||
logprobs=False,
|
||||
stream=True,
|
||||
)
|
||||
chunks: List[str] = []
|
||||
finish_reason_count = 0
|
||||
role_sent: bool = False
|
||||
|
||||
# assemble streamed chunks
|
||||
async for chunk in stream:
|
||||
delta = chunk.choices[0].delta
|
||||
|
||||
# make sure the role is assistant
|
||||
if delta.role:
|
||||
assert not role_sent
|
||||
assert delta.role == 'assistant'
|
||||
role_sent = True
|
||||
|
||||
if delta.content:
|
||||
chunks.append(delta.content)
|
||||
|
||||
if chunk.choices[0].finish_reason is not None:
|
||||
finish_reason_count += 1
|
||||
assert chunk.choices[0].finish_reason == choice.finish_reason
|
||||
|
||||
# make sure tool call chunks aren't being streamed
|
||||
assert not delta.tool_calls or len(delta.tool_calls) == 0
|
||||
|
||||
# make sure the role was sent, only 1 finish reason was sent, that chunks
|
||||
# were in fact sent, and that the chunks match non-streaming
|
||||
assert role_sent
|
||||
assert finish_reason_count == 1
|
||||
assert len(chunks)
|
||||
assert "".join(chunks) == output_text
|
||||
|
||||
|
||||
# test: conversation with tools enabled and provided that should not invoke
|
||||
# tools, to make sure we can still get normal chat completion responses
|
||||
# and that they won't be parsed as tools
|
||||
@pytest.mark.asyncio
|
||||
async def test_chat_completion_with_tools(client: openai.AsyncOpenAI,
|
||||
server_config: ServerConfig):
|
||||
models = await client.models.list()
|
||||
model_name: str = models.data[0].id
|
||||
chat_completion = await client.chat.completions.create(
|
||||
messages=ensure_system_prompt(MESSAGES_WITHOUT_TOOLS, server_config),
|
||||
temperature=0,
|
||||
max_completion_tokens=150,
|
||||
model=model_name,
|
||||
tools=[WEATHER_TOOL],
|
||||
logprobs=False)
|
||||
choice = chat_completion.choices[0]
|
||||
stop_reason = chat_completion.choices[0].finish_reason
|
||||
output_text = chat_completion.choices[0].message.content
|
||||
|
||||
# check to make sure we got text
|
||||
assert output_text is not None
|
||||
assert stop_reason != 'tool_calls'
|
||||
assert len(output_text) > 0
|
||||
|
||||
# check to make sure no tool calls were returned
|
||||
assert (choice.message.tool_calls is None
|
||||
or len(choice.message.tool_calls) == 0)
|
||||
|
||||
# make the same request, streaming
|
||||
stream = await client.chat.completions.create(
|
||||
messages=ensure_system_prompt(MESSAGES_WITHOUT_TOOLS, server_config),
|
||||
temperature=0,
|
||||
max_completion_tokens=150,
|
||||
model=model_name,
|
||||
logprobs=False,
|
||||
tools=[WEATHER_TOOL],
|
||||
stream=True,
|
||||
)
|
||||
|
||||
chunks: List[str] = []
|
||||
finish_reason_count = 0
|
||||
role_sent: bool = False
|
||||
|
||||
# assemble streamed chunks
|
||||
async for chunk in stream:
|
||||
delta = chunk.choices[0].delta
|
||||
|
||||
# make sure the role is assistant
|
||||
if delta.role:
|
||||
assert delta.role == 'assistant'
|
||||
role_sent = True
|
||||
|
||||
if delta.content:
|
||||
chunks.append(delta.content)
|
||||
|
||||
if chunk.choices[0].finish_reason is not None:
|
||||
finish_reason_count += 1
|
||||
|
||||
# make sure tool call chunks aren't being streamed
|
||||
assert not delta.tool_calls or len(delta.tool_calls) == 0
|
||||
|
||||
# make sure the role was sent, only 1 finish reason was sent, that chunks
|
||||
# were in fact sent, and that the chunks match non-streaming
|
||||
assert role_sent
|
||||
assert finish_reason_count == 1
|
||||
assert chunk.choices[0].finish_reason == stop_reason
|
||||
assert chunk.choices[0].finish_reason != 'tool_calls'
|
||||
assert len(chunks)
|
||||
assert "".join(chunks) == output_text
|
||||
275
vllm-v0.6.2/tests/tool_use/test_jamba_tool_parser.py
Normal file
275
vllm-v0.6.2/tests/tool_use/test_jamba_tool_parser.py
Normal file
@@ -0,0 +1,275 @@
|
||||
import json
|
||||
from typing import Generator, List, Optional
|
||||
|
||||
import partial_json_parser
|
||||
import pytest
|
||||
from partial_json_parser.core.options import Allow
|
||||
|
||||
from vllm.entrypoints.openai.protocol import (DeltaMessage, FunctionCall,
|
||||
ToolCall)
|
||||
from vllm.entrypoints.openai.tool_parsers import JambaToolParser
|
||||
from vllm.transformers_utils.detokenizer import detokenize_incrementally
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
|
||||
|
||||
MODEL = "ai21labs/Jamba-tiny-dev"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def jamba_tokenizer():
|
||||
return get_tokenizer(tokenizer_name=MODEL)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def jamba_tool_parser(jamba_tokenizer):
|
||||
return JambaToolParser(jamba_tokenizer)
|
||||
|
||||
|
||||
def assert_tool_calls(actual_tool_calls: List[ToolCall],
|
||||
expected_tool_calls: List[ToolCall]):
|
||||
assert len(actual_tool_calls) == len(expected_tool_calls)
|
||||
|
||||
for actual_tool_call, expected_tool_call in zip(actual_tool_calls,
|
||||
expected_tool_calls):
|
||||
assert isinstance(actual_tool_call.id, str)
|
||||
assert len(actual_tool_call.id) > 16
|
||||
|
||||
assert actual_tool_call.type == "function"
|
||||
assert actual_tool_call.function == expected_tool_call.function
|
||||
|
||||
|
||||
def stream_delta_message_generator(
|
||||
jamba_tool_parser: JambaToolParser, jamba_tokenizer: AnyTokenizer,
|
||||
model_output: str) -> Generator[DeltaMessage, None, None]:
|
||||
all_token_ids = jamba_tokenizer.encode(model_output,
|
||||
add_special_tokens=False)
|
||||
|
||||
previous_text = ""
|
||||
previous_tokens = None
|
||||
prefix_offset = 0
|
||||
read_offset = 0
|
||||
for i, delta_token in enumerate(all_token_ids):
|
||||
delta_token_ids = [delta_token]
|
||||
previous_token_ids = all_token_ids[:i]
|
||||
current_token_ids = all_token_ids[:i + 1]
|
||||
|
||||
(new_tokens, delta_text, new_prefix_offset,
|
||||
new_read_offset) = detokenize_incrementally(
|
||||
tokenizer=jamba_tokenizer,
|
||||
all_input_ids=current_token_ids,
|
||||
prev_tokens=previous_tokens,
|
||||
prefix_offset=prefix_offset,
|
||||
read_offset=read_offset,
|
||||
skip_special_tokens=False,
|
||||
spaces_between_special_tokens=True,
|
||||
)
|
||||
|
||||
current_text = previous_text + delta_text
|
||||
|
||||
delta_message = jamba_tool_parser.extract_tool_calls_streaming(
|
||||
previous_text,
|
||||
current_text,
|
||||
delta_text,
|
||||
previous_token_ids,
|
||||
current_token_ids,
|
||||
delta_token_ids,
|
||||
request=None, # type: ignore[arg-type]
|
||||
)
|
||||
if delta_message:
|
||||
yield delta_message
|
||||
|
||||
previous_text = current_text
|
||||
previous_tokens = previous_tokens + new_tokens if previous_tokens\
|
||||
else new_tokens
|
||||
prefix_offset = new_prefix_offset
|
||||
read_offset = new_read_offset
|
||||
|
||||
|
||||
def test_extract_tool_calls_no_tools(jamba_tool_parser):
|
||||
model_output = "This is a test"
|
||||
extracted_tool_calls = jamba_tool_parser.extract_tool_calls(
|
||||
model_output, request=None) # type: ignore[arg-type]
|
||||
assert not extracted_tool_calls.tools_called
|
||||
assert extracted_tool_calls.tool_calls == []
|
||||
assert extracted_tool_calls.content == model_output
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
ids=[
|
||||
"single_tool",
|
||||
"single_tool_with_content",
|
||||
"parallel_tools",
|
||||
],
|
||||
argnames=["model_output", "expected_tool_calls", "expected_content"],
|
||||
argvalues=[
|
||||
(
|
||||
''' <tool_calls>[\n {"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}\n]</tool_calls>''', # noqa: E501
|
||||
[
|
||||
ToolCall(function=FunctionCall(name="get_current_weather",
|
||||
arguments=json.dumps(
|
||||
{
|
||||
"city": "Dallas",
|
||||
"state": "TX",
|
||||
"unit": "fahrenheit"
|
||||
})))
|
||||
],
|
||||
None),
|
||||
(
|
||||
''' Sure! let me call the tool for you.<tool_calls>[\n {"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}\n]</tool_calls>''', # noqa: E501
|
||||
[
|
||||
ToolCall(function=FunctionCall(name="get_current_weather",
|
||||
arguments=json.dumps(
|
||||
{
|
||||
"city": "Dallas",
|
||||
"state": "TX",
|
||||
"unit": "fahrenheit"
|
||||
})))
|
||||
],
|
||||
" Sure! let me call the tool for you."),
|
||||
(
|
||||
''' <tool_calls>[\n {"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}},\n {"name": "get_current_weather", "arguments": {"city": "Orlando", "state": "FL", "unit": "fahrenheit"}}\n]</tool_calls>''', # noqa: E501
|
||||
[
|
||||
ToolCall(function=FunctionCall(name="get_current_weather",
|
||||
arguments=json.dumps(
|
||||
{
|
||||
"city": "Dallas",
|
||||
"state": "TX",
|
||||
"unit": "fahrenheit"
|
||||
}))),
|
||||
ToolCall(function=FunctionCall(name="get_current_weather",
|
||||
arguments=json.dumps(
|
||||
{
|
||||
"city": "Orlando",
|
||||
"state": "FL",
|
||||
"unit": "fahrenheit"
|
||||
})))
|
||||
],
|
||||
None)
|
||||
],
|
||||
)
|
||||
def test_extract_tool_calls(jamba_tool_parser, model_output,
|
||||
expected_tool_calls, expected_content):
|
||||
extracted_tool_calls = jamba_tool_parser.extract_tool_calls(
|
||||
model_output, request=None) # type: ignore[arg-type]
|
||||
assert extracted_tool_calls.tools_called
|
||||
|
||||
assert_tool_calls(extracted_tool_calls.tool_calls, expected_tool_calls)
|
||||
|
||||
assert extracted_tool_calls.content == expected_content
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
ids=[
|
||||
"no_tools",
|
||||
"single_tool",
|
||||
"single_tool_with_content",
|
||||
"parallel_tools",
|
||||
],
|
||||
argnames=["model_output", "expected_tool_calls", "expected_content"],
|
||||
argvalues=[
|
||||
('''This is a test''', [], '''This is a test'''),
|
||||
(
|
||||
''' <tool_calls>[\n {"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}\n]</tool_calls>''', # noqa: E501
|
||||
[
|
||||
ToolCall(function=FunctionCall(name="get_current_weather",
|
||||
arguments=json.dumps(
|
||||
{
|
||||
"city": "Dallas",
|
||||
"state": "TX",
|
||||
"unit": "fahrenheit"
|
||||
})))
|
||||
],
|
||||
" "),
|
||||
(
|
||||
''' Sure! let me call the tool for you.<tool_calls>[\n {"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}\n]</tool_calls>''', # noqa: E501
|
||||
[
|
||||
ToolCall(function=FunctionCall(name="get_current_weather",
|
||||
arguments=json.dumps(
|
||||
{
|
||||
"city": "Dallas",
|
||||
"state": "TX",
|
||||
"unit": "fahrenheit"
|
||||
})))
|
||||
],
|
||||
" Sure! let me call the tool for you."),
|
||||
(
|
||||
''' <tool_calls>[\n {"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}},\n {"name": "get_current_weather", "arguments": {"city": "Orlando", "state": "FL", "unit": "fahrenheit"}}\n]</tool_calls>''', # noqa: E501
|
||||
[
|
||||
ToolCall(function=FunctionCall(name="get_current_weather",
|
||||
arguments=json.dumps(
|
||||
{
|
||||
"city": "Dallas",
|
||||
"state": "TX",
|
||||
"unit": "fahrenheit"
|
||||
}))),
|
||||
ToolCall(function=FunctionCall(name="get_current_weather",
|
||||
arguments=json.dumps(
|
||||
{
|
||||
"city": "Orlando",
|
||||
"state": "FL",
|
||||
"unit": "fahrenheit"
|
||||
})))
|
||||
],
|
||||
" ")
|
||||
],
|
||||
)
|
||||
def test_extract_tool_calls_streaming(jamba_tool_parser, jamba_tokenizer,
|
||||
model_output, expected_tool_calls,
|
||||
expected_content):
|
||||
other_content: str = ''
|
||||
function_names: List[str] = []
|
||||
function_args_strs: List[str] = []
|
||||
tool_call_idx: int = -1
|
||||
tool_call_ids: List[Optional[str]] = []
|
||||
|
||||
for delta_message in stream_delta_message_generator(
|
||||
jamba_tool_parser, jamba_tokenizer, model_output):
|
||||
# role should never be streamed from tool parser
|
||||
assert not delta_message.role
|
||||
|
||||
if delta_message.content:
|
||||
other_content += delta_message.content
|
||||
|
||||
streamed_tool_calls = delta_message.tool_calls
|
||||
|
||||
if streamed_tool_calls and len(streamed_tool_calls) > 0:
|
||||
# make sure only one diff is present - correct even for parallel
|
||||
assert len(streamed_tool_calls) == 1
|
||||
tool_call = streamed_tool_calls[0]
|
||||
|
||||
# if a new tool is being called, set up empty arguments
|
||||
if tool_call.index != tool_call_idx:
|
||||
tool_call_idx = tool_call.index
|
||||
function_args_strs.append("")
|
||||
tool_call_ids.append(None)
|
||||
|
||||
# if a tool call ID is streamed, make sure one hasn't been already
|
||||
if tool_call.id and not tool_call_ids[tool_call.index]:
|
||||
tool_call_ids[tool_call.index] = tool_call.id
|
||||
|
||||
# if parts of the function start being streamed
|
||||
if tool_call.function:
|
||||
# if the function name is defined, set it. it should be streamed
|
||||
# IN ENTIRETY, exactly one time.
|
||||
if tool_call.function.name:
|
||||
assert isinstance(tool_call.function.name, str)
|
||||
function_names.append(tool_call.function.name)
|
||||
|
||||
if tool_call.function.arguments:
|
||||
# make sure they're a string and then add them to the list
|
||||
assert isinstance(tool_call.function.arguments, str)
|
||||
|
||||
function_args_strs[
|
||||
tool_call.index] += tool_call.function.arguments
|
||||
|
||||
assert other_content == expected_content
|
||||
|
||||
actual_tool_calls = [
|
||||
ToolCall(id=tool_call_id,
|
||||
function=FunctionCall(
|
||||
name=function_name,
|
||||
arguments=partial_json_parser.ensure_json(
|
||||
function_args_str, Allow.OBJ | Allow.STR)))
|
||||
for tool_call_id, function_name, function_args_str in zip(
|
||||
tool_call_ids, function_names, function_args_strs)
|
||||
]
|
||||
assert_tool_calls(actual_tool_calls, expected_tool_calls)
|
||||
205
vllm-v0.6.2/tests/tool_use/test_parallel_tool_calls.py
Normal file
205
vllm-v0.6.2/tests/tool_use/test_parallel_tool_calls.py
Normal file
@@ -0,0 +1,205 @@
|
||||
import json
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import openai
|
||||
import pytest
|
||||
|
||||
from .utils import (MESSAGES_ASKING_FOR_PARALLEL_TOOLS,
|
||||
MESSAGES_WITH_PARALLEL_TOOL_RESPONSE, SEARCH_TOOL,
|
||||
WEATHER_TOOL, ServerConfig)
|
||||
|
||||
|
||||
# test: getting the model to generate parallel tool calls (streaming/not)
|
||||
# when requested. NOTE that not all models may support this, so some exclusions
|
||||
# may be added in the future. e.g. llama 3.1 models are not designed to support
|
||||
# parallel tool calls.
|
||||
@pytest.mark.asyncio
|
||||
async def test_parallel_tool_calls(client: openai.AsyncOpenAI,
|
||||
server_config: ServerConfig):
|
||||
|
||||
if not server_config.get("supports_parallel", True):
|
||||
pytest.skip("The {} model doesn't support parallel tool calls".format(
|
||||
server_config["model"]))
|
||||
|
||||
models = await client.models.list()
|
||||
model_name: str = models.data[0].id
|
||||
chat_completion = await client.chat.completions.create(
|
||||
messages=MESSAGES_ASKING_FOR_PARALLEL_TOOLS,
|
||||
temperature=0,
|
||||
max_completion_tokens=200,
|
||||
model=model_name,
|
||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||
logprobs=False)
|
||||
|
||||
choice = chat_completion.choices[0]
|
||||
stop_reason = chat_completion.choices[0].finish_reason
|
||||
non_streamed_tool_calls = chat_completion.choices[0].message.tool_calls
|
||||
|
||||
# make sure 2 tool calls are present
|
||||
assert choice.message.role == "assistant"
|
||||
assert non_streamed_tool_calls is not None
|
||||
assert len(non_streamed_tool_calls) == 2
|
||||
|
||||
for tool_call in non_streamed_tool_calls:
|
||||
# make sure the tool includes a function and ID
|
||||
assert tool_call.type == "function"
|
||||
assert tool_call.function is not None
|
||||
assert isinstance(tool_call.id, str)
|
||||
assert len(tool_call.id) >= 9
|
||||
|
||||
# make sure the weather tool was called correctly
|
||||
assert tool_call.function.name == WEATHER_TOOL["function"]["name"]
|
||||
assert isinstance(tool_call.function.arguments, str)
|
||||
|
||||
parsed_arguments = json.loads(tool_call.function.arguments)
|
||||
assert isinstance(parsed_arguments, Dict)
|
||||
assert isinstance(parsed_arguments.get("city"), str)
|
||||
assert isinstance(parsed_arguments.get("state"), str)
|
||||
|
||||
assert stop_reason == "tool_calls"
|
||||
|
||||
# make the same request, streaming
|
||||
stream = await client.chat.completions.create(
|
||||
model=model_name,
|
||||
messages=MESSAGES_ASKING_FOR_PARALLEL_TOOLS,
|
||||
temperature=0,
|
||||
max_completion_tokens=200,
|
||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||
logprobs=False,
|
||||
stream=True)
|
||||
|
||||
role_name: Optional[str] = None
|
||||
finish_reason_count: int = 0
|
||||
|
||||
tool_call_names: List[str] = []
|
||||
tool_call_args: List[str] = []
|
||||
tool_call_idx: int = -1
|
||||
tool_call_id_count: int = 0
|
||||
|
||||
async for chunk in stream:
|
||||
|
||||
# if there's a finish reason make sure it's tools
|
||||
if chunk.choices[0].finish_reason:
|
||||
finish_reason_count += 1
|
||||
assert chunk.choices[0].finish_reason == 'tool_calls'
|
||||
|
||||
# if a role is being streamed make sure it wasn't already set to
|
||||
# something else
|
||||
if chunk.choices[0].delta.role:
|
||||
assert not role_name or role_name == 'assistant'
|
||||
role_name = 'assistant'
|
||||
|
||||
# if a tool call is streamed make sure there's exactly one
|
||||
# (based on the request parameters
|
||||
streamed_tool_calls = chunk.choices[0].delta.tool_calls
|
||||
|
||||
if streamed_tool_calls and len(streamed_tool_calls) > 0:
|
||||
|
||||
# make sure only one diff is present - correct even for parallel
|
||||
assert len(streamed_tool_calls) == 1
|
||||
tool_call = streamed_tool_calls[0]
|
||||
|
||||
# if a new tool is being called, set up empty arguments
|
||||
if tool_call.index != tool_call_idx:
|
||||
tool_call_idx = tool_call.index
|
||||
tool_call_args.append("")
|
||||
|
||||
# if a tool call ID is streamed, make sure one hasn't been already
|
||||
if tool_call.id:
|
||||
tool_call_id_count += 1
|
||||
assert (isinstance(tool_call.id, str)
|
||||
and (len(tool_call.id) >= 9))
|
||||
|
||||
# if parts of the function start being streamed
|
||||
if tool_call.function:
|
||||
# if the function name is defined, set it. it should be streamed
|
||||
# IN ENTIRETY, exactly one time.
|
||||
if tool_call.function.name:
|
||||
assert isinstance(tool_call.function.name, str)
|
||||
tool_call_names.append(tool_call.function.name)
|
||||
|
||||
if tool_call.function.arguments:
|
||||
# make sure they're a string and then add them to the list
|
||||
assert isinstance(tool_call.function.arguments, str)
|
||||
|
||||
tool_call_args[
|
||||
tool_call.index] += tool_call.function.arguments
|
||||
|
||||
assert finish_reason_count == 1
|
||||
assert role_name == 'assistant'
|
||||
|
||||
assert (len(non_streamed_tool_calls) == len(tool_call_names) ==
|
||||
len(tool_call_args))
|
||||
|
||||
for i in range(2):
|
||||
assert non_streamed_tool_calls[i].function.name == tool_call_names[i]
|
||||
streamed_args = json.loads(tool_call_args[i])
|
||||
non_streamed_args = json.loads(
|
||||
non_streamed_tool_calls[i].function.arguments)
|
||||
assert streamed_args == non_streamed_args
|
||||
|
||||
|
||||
# test: providing parallel tool calls back to the model to get a response
|
||||
# (streaming/not)
|
||||
@pytest.mark.asyncio
|
||||
async def test_parallel_tool_calls_with_results(client: openai.AsyncOpenAI,
|
||||
server_config: ServerConfig):
|
||||
|
||||
if not server_config.get("supports_parallel", True):
|
||||
pytest.skip("The {} model doesn't support parallel tool calls".format(
|
||||
server_config["model"]))
|
||||
|
||||
models = await client.models.list()
|
||||
model_name: str = models.data[0].id
|
||||
chat_completion = await client.chat.completions.create(
|
||||
messages=MESSAGES_WITH_PARALLEL_TOOL_RESPONSE,
|
||||
temperature=0,
|
||||
max_completion_tokens=200,
|
||||
model=model_name,
|
||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||
logprobs=False)
|
||||
|
||||
choice = chat_completion.choices[0]
|
||||
|
||||
assert choice.finish_reason != "tool_calls" # "stop" or "length"
|
||||
assert choice.message.role == "assistant"
|
||||
assert choice.message.tool_calls is None \
|
||||
or len(choice.message.tool_calls) == 0
|
||||
assert choice.message.content is not None
|
||||
assert "98" in choice.message.content # Dallas temp in tool response
|
||||
assert "78" in choice.message.content # Orlando temp in tool response
|
||||
|
||||
stream = await client.chat.completions.create(
|
||||
messages=MESSAGES_WITH_PARALLEL_TOOL_RESPONSE,
|
||||
temperature=0,
|
||||
max_completion_tokens=200,
|
||||
model=model_name,
|
||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||
logprobs=False,
|
||||
stream=True)
|
||||
|
||||
chunks: List[str] = []
|
||||
finish_reason_count = 0
|
||||
role_sent: bool = False
|
||||
|
||||
async for chunk in stream:
|
||||
delta = chunk.choices[0].delta
|
||||
|
||||
if delta.role:
|
||||
assert not role_sent
|
||||
assert delta.role == "assistant"
|
||||
role_sent = True
|
||||
|
||||
if delta.content:
|
||||
chunks.append(delta.content)
|
||||
|
||||
if chunk.choices[0].finish_reason is not None:
|
||||
finish_reason_count += 1
|
||||
assert chunk.choices[0].finish_reason == choice.finish_reason
|
||||
|
||||
assert not delta.tool_calls or len(delta.tool_calls) == 0
|
||||
|
||||
assert role_sent
|
||||
assert finish_reason_count == 1
|
||||
assert len(chunks)
|
||||
assert "".join(chunks) == choice.message.content
|
||||
192
vllm-v0.6.2/tests/tool_use/test_tool_calls.py
Normal file
192
vllm-v0.6.2/tests/tool_use/test_tool_calls.py
Normal file
@@ -0,0 +1,192 @@
|
||||
import json
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import openai
|
||||
import pytest
|
||||
|
||||
from .utils import (MESSAGES_ASKING_FOR_TOOLS, MESSAGES_WITH_TOOL_RESPONSE,
|
||||
SEARCH_TOOL, WEATHER_TOOL)
|
||||
|
||||
|
||||
# test: request a chat completion that should return tool calls, so we know they
|
||||
# are parsable
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_call_and_choice(client: openai.AsyncOpenAI):
|
||||
models = await client.models.list()
|
||||
model_name: str = models.data[0].id
|
||||
chat_completion = await client.chat.completions.create(
|
||||
messages=MESSAGES_ASKING_FOR_TOOLS,
|
||||
temperature=0,
|
||||
max_completion_tokens=100,
|
||||
model=model_name,
|
||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||
logprobs=False)
|
||||
|
||||
choice = chat_completion.choices[0]
|
||||
stop_reason = chat_completion.choices[0].finish_reason
|
||||
tool_calls = chat_completion.choices[0].message.tool_calls
|
||||
|
||||
# make sure a tool call is present
|
||||
assert choice.message.role == 'assistant'
|
||||
assert tool_calls is not None
|
||||
assert len(tool_calls) == 1
|
||||
assert tool_calls[0].type == 'function'
|
||||
assert tool_calls[0].function is not None
|
||||
assert isinstance(tool_calls[0].id, str)
|
||||
assert len(tool_calls[0].id) >= 9
|
||||
|
||||
# make sure the weather tool was called (classic example) with arguments
|
||||
assert tool_calls[0].function.name == WEATHER_TOOL["function"]["name"]
|
||||
assert tool_calls[0].function.arguments is not None
|
||||
assert isinstance(tool_calls[0].function.arguments, str)
|
||||
|
||||
# make sure the arguments parse properly
|
||||
parsed_arguments = json.loads(tool_calls[0].function.arguments)
|
||||
assert isinstance(parsed_arguments, Dict)
|
||||
assert isinstance(parsed_arguments.get("city"), str)
|
||||
assert isinstance(parsed_arguments.get("state"), str)
|
||||
assert parsed_arguments.get("city") == "Dallas"
|
||||
assert parsed_arguments.get("state") == "TX"
|
||||
|
||||
assert stop_reason == "tool_calls"
|
||||
|
||||
function_name: Optional[str] = None
|
||||
function_args_str: str = ''
|
||||
tool_call_id: Optional[str] = None
|
||||
role_name: Optional[str] = None
|
||||
finish_reason_count: int = 0
|
||||
|
||||
# make the same request, streaming
|
||||
stream = await client.chat.completions.create(
|
||||
model=model_name,
|
||||
messages=MESSAGES_ASKING_FOR_TOOLS,
|
||||
temperature=0,
|
||||
max_completion_tokens=100,
|
||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||
logprobs=False,
|
||||
stream=True)
|
||||
|
||||
async for chunk in stream:
|
||||
assert chunk.choices[0].index == 0
|
||||
|
||||
if chunk.choices[0].finish_reason:
|
||||
finish_reason_count += 1
|
||||
assert chunk.choices[0].finish_reason == 'tool_calls'
|
||||
|
||||
# if a role is being streamed make sure it wasn't already set to
|
||||
# something else
|
||||
if chunk.choices[0].delta.role:
|
||||
assert not role_name or role_name == 'assistant'
|
||||
role_name = 'assistant'
|
||||
|
||||
# if a tool call is streamed make sure there's exactly one
|
||||
# (based on the request parameters
|
||||
streamed_tool_calls = chunk.choices[0].delta.tool_calls
|
||||
|
||||
if streamed_tool_calls and len(streamed_tool_calls) > 0:
|
||||
assert len(streamed_tool_calls) == 1
|
||||
tool_call = streamed_tool_calls[0]
|
||||
|
||||
# if a tool call ID is streamed, make sure one hasn't been already
|
||||
if tool_call.id:
|
||||
assert not tool_call_id
|
||||
tool_call_id = tool_call.id
|
||||
|
||||
# if parts of the function start being streamed
|
||||
if tool_call.function:
|
||||
# if the function name is defined, set it. it should be streamed
|
||||
# IN ENTIRETY, exactly one time.
|
||||
if tool_call.function.name:
|
||||
assert function_name is None
|
||||
assert isinstance(tool_call.function.name, str)
|
||||
function_name = tool_call.function.name
|
||||
if tool_call.function.arguments:
|
||||
assert isinstance(tool_call.function.arguments, str)
|
||||
function_args_str += tool_call.function.arguments
|
||||
|
||||
assert finish_reason_count == 1
|
||||
assert role_name == 'assistant'
|
||||
assert isinstance(tool_call_id, str) and (len(tool_call_id) >= 9)
|
||||
|
||||
# validate the name and arguments
|
||||
assert function_name == WEATHER_TOOL["function"]["name"]
|
||||
assert function_name == tool_calls[0].function.name
|
||||
assert isinstance(function_args_str, str)
|
||||
|
||||
# validate arguments
|
||||
streamed_args = json.loads(function_args_str)
|
||||
assert isinstance(streamed_args, Dict)
|
||||
assert isinstance(streamed_args.get("city"), str)
|
||||
assert isinstance(streamed_args.get("state"), str)
|
||||
assert streamed_args.get("city") == "Dallas"
|
||||
assert streamed_args.get("state") == "TX"
|
||||
|
||||
# make sure everything matches non-streaming except for ID
|
||||
assert function_name == tool_calls[0].function.name
|
||||
assert choice.message.role == role_name
|
||||
assert choice.message.tool_calls[0].function.name == function_name
|
||||
|
||||
# compare streamed with non-streamed args Dict-wise, not string-wise
|
||||
# because character-to-character comparison might not work e.g. the tool
|
||||
# call parser adding extra spaces or something like that. we care about the
|
||||
# dicts matching not byte-wise match
|
||||
assert parsed_arguments == streamed_args
|
||||
|
||||
|
||||
# test: providing tools and results back to model to get a non-tool response
|
||||
# (streaming/not)
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_call_with_results(client: openai.AsyncOpenAI):
|
||||
models = await client.models.list()
|
||||
model_name: str = models.data[0].id
|
||||
chat_completion = await client.chat.completions.create(
|
||||
messages=MESSAGES_WITH_TOOL_RESPONSE,
|
||||
temperature=0,
|
||||
max_completion_tokens=100,
|
||||
model=model_name,
|
||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||
logprobs=False)
|
||||
|
||||
choice = chat_completion.choices[0]
|
||||
|
||||
assert choice.finish_reason != "tool_calls" # "stop" or "length"
|
||||
assert choice.message.role == "assistant"
|
||||
assert choice.message.tool_calls is None \
|
||||
or len(choice.message.tool_calls) == 0
|
||||
assert choice.message.content is not None
|
||||
assert "98" in choice.message.content # the temperature from the response
|
||||
|
||||
stream = await client.chat.completions.create(
|
||||
messages=MESSAGES_WITH_TOOL_RESPONSE,
|
||||
temperature=0,
|
||||
max_completion_tokens=100,
|
||||
model=model_name,
|
||||
tools=[WEATHER_TOOL, SEARCH_TOOL],
|
||||
logprobs=False,
|
||||
stream=True)
|
||||
|
||||
chunks: List[str] = []
|
||||
finish_reason_count = 0
|
||||
role_sent: bool = False
|
||||
|
||||
async for chunk in stream:
|
||||
delta = chunk.choices[0].delta
|
||||
|
||||
if delta.role:
|
||||
assert not role_sent
|
||||
assert delta.role == "assistant"
|
||||
role_sent = True
|
||||
|
||||
if delta.content:
|
||||
chunks.append(delta.content)
|
||||
|
||||
if chunk.choices[0].finish_reason is not None:
|
||||
finish_reason_count += 1
|
||||
assert chunk.choices[0].finish_reason == choice.finish_reason
|
||||
|
||||
assert not delta.tool_calls or len(delta.tool_calls) == 0
|
||||
|
||||
assert role_sent
|
||||
assert finish_reason_count == 1
|
||||
assert len(chunks)
|
||||
assert "".join(chunks) == choice.message.content
|
||||
305
vllm-v0.6.2/tests/tool_use/utils.py
Normal file
305
vllm-v0.6.2/tests/tool_use/utils.py
Normal file
@@ -0,0 +1,305 @@
|
||||
from copy import deepcopy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from openai.types.chat import (ChatCompletionMessageParam,
|
||||
ChatCompletionToolParam)
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from tests.utils import VLLM_PATH
|
||||
|
||||
|
||||
class ServerConfig(TypedDict, total=False):
|
||||
model: str
|
||||
arguments: List[str]
|
||||
system_prompt: Optional[str]
|
||||
supports_parallel: Optional[bool]
|
||||
supports_rocm: Optional[bool]
|
||||
|
||||
|
||||
def patch_system_prompt(messages: List[Dict[str, Any]],
|
||||
system_prompt: str) -> List[Dict[str, Any]]:
|
||||
new_messages = deepcopy(messages)
|
||||
if new_messages[0]["role"] == "system":
|
||||
new_messages[0]["content"] = system_prompt
|
||||
else:
|
||||
new_messages.insert(0, {"role": "system", "content": system_prompt})
|
||||
return new_messages
|
||||
|
||||
|
||||
def ensure_system_prompt(messages: List[Dict[str, Any]],
|
||||
config: ServerConfig) -> List[Dict[str, Any]]:
|
||||
prompt = config.get("system_prompt")
|
||||
if prompt:
|
||||
return patch_system_prompt(messages, prompt)
|
||||
else:
|
||||
return messages
|
||||
|
||||
|
||||
# universal args for all models go here. also good if you need to test locally
|
||||
# and change type or KV cache quantization or something.
|
||||
ARGS: List[str] = ["--enable-auto-tool-choice", "--max-model-len", "1024"]
|
||||
|
||||
CONFIGS: Dict[str, ServerConfig] = {
|
||||
"hermes": {
|
||||
"model":
|
||||
"NousResearch/Hermes-3-Llama-3.1-8B",
|
||||
"arguments": [
|
||||
"--tool-call-parser", "hermes", "--chat-template",
|
||||
str(VLLM_PATH / "examples/tool_chat_template_hermes.jinja")
|
||||
],
|
||||
"system_prompt":
|
||||
"You are a helpful assistant with access to tools. If a tool"
|
||||
" that you have would be helpful to answer a user query, "
|
||||
"call the tool. Otherwise, answer the user's query directly "
|
||||
"without calling a tool. DO NOT CALL A TOOL THAT IS IRRELEVANT "
|
||||
"to the user's question - just respond to it normally."
|
||||
},
|
||||
"llama": {
|
||||
"model":
|
||||
"meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"arguments": [
|
||||
"--tool-call-parser", "llama3_json", "--chat-template",
|
||||
str(VLLM_PATH / "examples/tool_chat_template_llama3.1_json.jinja")
|
||||
],
|
||||
"supports_parallel":
|
||||
False,
|
||||
},
|
||||
"llama3.2": {
|
||||
"model":
|
||||
"meta-llama/Llama-3.2-3B-Instruct",
|
||||
"arguments": [
|
||||
"--tool-call-parser", "llama3_json", "--chat-template",
|
||||
str(VLLM_PATH / "examples/tool_chat_template_llama3.2_json.jinja")
|
||||
],
|
||||
"supports_parallel":
|
||||
False,
|
||||
},
|
||||
"mistral": {
|
||||
"model":
|
||||
"mistralai/Mistral-7B-Instruct-v0.3",
|
||||
"arguments": [
|
||||
"--tool-call-parser", "mistral", "--chat-template",
|
||||
str(VLLM_PATH / "examples/tool_chat_template_mistral.jinja"),
|
||||
"--ignore-patterns=\"consolidated.safetensors\""
|
||||
],
|
||||
"system_prompt":
|
||||
"You are a helpful assistant with access to tools. If a tool"
|
||||
" that you have would be helpful to answer a user query, "
|
||||
"call the tool. Otherwise, answer the user's query directly "
|
||||
"without calling a tool. DO NOT CALL A TOOL THAT IS IRRELEVANT "
|
||||
"to the user's question - just respond to it normally."
|
||||
},
|
||||
"granite20b": {
|
||||
"model":
|
||||
"mbayser/granite-20b-functioncalling-FP8-KV",
|
||||
"arguments": [
|
||||
"--tool-call-parser", "granite-20b-fc", "--chat-template",
|
||||
str(VLLM_PATH /
|
||||
"examples/tool_chat_template_granite_20b_fc.jinja"),
|
||||
"--max_num_seqs", "1", "--enforce-eager", "--cpu-offload-gb", "20"
|
||||
],
|
||||
"supports_parallel":
|
||||
False,
|
||||
"supports_rocm":
|
||||
False,
|
||||
},
|
||||
"granite8b": {
|
||||
"model":
|
||||
"ibm-granite/granite-3.0-8b-instruct",
|
||||
"arguments": [
|
||||
"--tool-call-parser", "granite", "--chat-template",
|
||||
str(VLLM_PATH / "examples/tool_chat_template_granite.jinja")
|
||||
],
|
||||
},
|
||||
"internlm": {
|
||||
"model":
|
||||
"internlm/internlm2_5-7b-chat",
|
||||
"arguments": [
|
||||
"--tool-call-parser", "internlm", "--chat-template",
|
||||
str(VLLM_PATH /
|
||||
"examples/tool_chat_template_internlm2_tool.jinja"),
|
||||
"--trust_remote_code"
|
||||
],
|
||||
"supports_parallel":
|
||||
False,
|
||||
},
|
||||
"toolACE": {
|
||||
"model":
|
||||
"Team-ACE/ToolACE-8B",
|
||||
"arguments": [
|
||||
"--tool-call-parser", "pythonic", "--chat-template",
|
||||
str(VLLM_PATH / "examples/tool_chat_template_toolace.jinja")
|
||||
],
|
||||
"supports_parallel":
|
||||
True,
|
||||
},
|
||||
}
|
||||
|
||||
WEATHER_TOOL: ChatCompletionToolParam = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"city": {
|
||||
"type":
|
||||
"string",
|
||||
"description":
|
||||
"The city to find the weather for, "
|
||||
"e.g. 'San Francisco'"
|
||||
},
|
||||
"state": {
|
||||
"type":
|
||||
"string",
|
||||
"description":
|
||||
"must the two-letter abbreviation for the state "
|
||||
"that the city is in, e.g. 'CA' which would "
|
||||
"mean 'California'"
|
||||
},
|
||||
"unit": {
|
||||
"type": "string",
|
||||
"description": "The unit to fetch the temperature in",
|
||||
"enum": ["celsius", "fahrenheit"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SEARCH_TOOL: ChatCompletionToolParam = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name":
|
||||
"web_search",
|
||||
"description":
|
||||
"Search the internet and get a summary of the top "
|
||||
"10 webpages. Should only be used if you don't know "
|
||||
"the answer to a user query, and the results are likely"
|
||||
"to be able to be found with a web search",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"search_term": {
|
||||
"type":
|
||||
"string",
|
||||
"description":
|
||||
"The term to use in the search. This should"
|
||||
"ideally be keywords to search for, not a"
|
||||
"natural-language question"
|
||||
}
|
||||
},
|
||||
"required": ["search_term"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MESSAGES_WITHOUT_TOOLS: List[ChatCompletionMessageParam] = [{
|
||||
"role":
|
||||
"user",
|
||||
"content":
|
||||
"Hi! How are you?"
|
||||
}, {
|
||||
"role":
|
||||
"assistant",
|
||||
"content":
|
||||
"I'm doing great! How can I assist you?"
|
||||
}, {
|
||||
"role":
|
||||
"user",
|
||||
"content":
|
||||
"Can you tell me a joke please?"
|
||||
}]
|
||||
|
||||
MESSAGES_ASKING_FOR_TOOLS: List[ChatCompletionMessageParam] = [{
|
||||
"role":
|
||||
"user",
|
||||
"content":
|
||||
"What is the weather in Dallas, Texas in Fahrenheit?"
|
||||
}]
|
||||
|
||||
MESSAGES_WITH_TOOL_RESPONSE: List[ChatCompletionMessageParam] = [{
|
||||
"role":
|
||||
"user",
|
||||
"content":
|
||||
"What is the weather in Dallas, Texas in Fahrenheit?"
|
||||
}, {
|
||||
"role":
|
||||
"assistant",
|
||||
"tool_calls": [{
|
||||
"id": "chatcmpl-tool-03e6481b146e408e9523d9c956696295",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name":
|
||||
WEATHER_TOOL["function"]["name"],
|
||||
"arguments":
|
||||
'{"city": "Dallas", "state": "TX", '
|
||||
'"unit": "fahrenheit"}'
|
||||
}
|
||||
}]
|
||||
}, {
|
||||
"role":
|
||||
"tool",
|
||||
"tool_call_id":
|
||||
"chatcmpl-tool-03e6481b146e408e9523d9c956696295",
|
||||
"content":
|
||||
"The weather in Dallas is 98 degrees fahrenheit, with partly"
|
||||
"cloudy skies and a low chance of rain."
|
||||
}]
|
||||
|
||||
MESSAGES_ASKING_FOR_PARALLEL_TOOLS: List[ChatCompletionMessageParam] = [{
|
||||
"role":
|
||||
"user",
|
||||
"content":
|
||||
"What is the weather in Dallas, Texas and Orlando, Florida in "
|
||||
"Fahrenheit?"
|
||||
}]
|
||||
|
||||
MESSAGES_WITH_PARALLEL_TOOL_RESPONSE: List[ChatCompletionMessageParam] = [{
|
||||
"role":
|
||||
"user",
|
||||
"content":
|
||||
"What is the weather in Dallas, Texas and Orlando, Florida in "
|
||||
"Fahrenheit?"
|
||||
}, {
|
||||
"role":
|
||||
"assistant",
|
||||
"tool_calls": [{
|
||||
"id": "chatcmpl-tool-03e6481b146e408e9523d9c956696295",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name":
|
||||
WEATHER_TOOL["function"]["name"],
|
||||
"arguments":
|
||||
'{"city": "Dallas", "state": "TX", '
|
||||
'"unit": "fahrenheit"}'
|
||||
}
|
||||
}, {
|
||||
"id": "chatcmpl-tool-d027061e1bd21cda48bee7da829c1f5b",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name":
|
||||
WEATHER_TOOL["function"]["name"],
|
||||
"arguments":
|
||||
'{"city": "Orlando", "state": "Fl", '
|
||||
'"unit": "fahrenheit"}'
|
||||
}
|
||||
}]
|
||||
}, {
|
||||
"role":
|
||||
"tool",
|
||||
"tool_call_id":
|
||||
"chatcmpl-tool-03e6481b146e408e9523d9c956696295",
|
||||
"content":
|
||||
"The weather in Dallas TX is 98 degrees fahrenheit with mostly "
|
||||
"cloudy skies and a chance of rain in the evening."
|
||||
}, {
|
||||
"role":
|
||||
"tool",
|
||||
"tool_call_id":
|
||||
"chatcmpl-tool-d027061e1bd21cda48bee7da829c1f5b",
|
||||
"content":
|
||||
"The weather in Orlando FL is 78 degrees fahrenheit with clear"
|
||||
"skies."
|
||||
}]
|
||||
Reference in New Issue
Block a user