feat(Tool Calling): Support required and specific function mode (#6550)
This commit is contained in:
@@ -54,10 +54,12 @@
|
||||
"source": [
|
||||
"Note that `--tool-call-parser` defines the parser used to interpret responses. Currently supported parsers include:\n",
|
||||
"\n",
|
||||
"- llama3: Llama 3.1 / 3.2 (e.g. meta-llama/Llama-3.1-8B-Instruct, meta-llama/Llama-3.2-1B-Instruct).\n",
|
||||
"- llama3: Llama 3.1 / 3.2 / 3.3 (e.g. meta-llama/Llama-3.1-8B-Instruct, meta-llama/Llama-3.2-1B-Instruct, meta-llama/Llama-3.3-70B-Instruct).\n",
|
||||
"- llama4: Llama 4 (e.g. meta-llama/Llama-4-Scout-17B-16E-Instruct).\n",
|
||||
"- mistral: Mistral (e.g. mistralai/Mistral-7B-Instruct-v0.3, mistralai/Mistral-Nemo-Instruct-2407, mistralai/\n",
|
||||
"Mistral-Nemo-Instruct-2407, mistralai/Mistral-7B-v0.3).\n",
|
||||
"- qwen25: Qwen 2.5 (e.g. Qwen/Qwen2.5-1.5B-Instruct, Qwen/Qwen2.5-7B-Instruct) and QwQ (i.e. Qwen/QwQ-32B). Especially, for QwQ, we can enable the reasoning parser together with tool call parser, details about reasoning parser can be found in [reasoning parser](https://docs.sglang.ai/backend/separate_reasoning.html)."
|
||||
"- qwen25: Qwen 2.5 (e.g. Qwen/Qwen2.5-1.5B-Instruct, Qwen/Qwen2.5-7B-Instruct) and QwQ (i.e. Qwen/QwQ-32B). Especially, for QwQ, we can enable the reasoning parser together with tool call parser, details about reasoning parser can be found in [reasoning parser](https://docs.sglang.ai/backend/separate_reasoning.html).\n",
|
||||
"- deepseekv3: DeepSeek-v3 (e.g., deepseek-ai/DeepSeek-V3-0324).\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -360,6 +362,164 @@
|
||||
"print(final_response.choices[0].message.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tool Choice Mode\n",
|
||||
"\n",
|
||||
"SGLang supports OpenAI's `tool_choice` parameter to control when and which tools the model should call. This feature is implemented using EBNF (Extended Backus-Naur Form) grammar to ensure reliable tool calling behavior.\n",
|
||||
"\n",
|
||||
"### Supported Tool Choice Options\n",
|
||||
"\n",
|
||||
"- **`tool_choice=\"required\"`**: Forces the model to call at least one tool\n",
|
||||
"- **`tool_choice={\"type\": \"function\", \"function\": {\"name\": \"specific_function\"}}`**: Forces the model to call a specific function\n",
|
||||
"\n",
|
||||
"### Backend Compatibility\n",
|
||||
"\n",
|
||||
"Tool choice is fully supported with the **Xgrammar backend**, which is the default grammar backend (`--grammar-backend xgrammar`). However, it may not be fully supported with other backends such as `outlines`.\n",
|
||||
"\n",
|
||||
"### Example: Required Tool Choice"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Response with tool_choice='required':\n",
|
||||
"Content: None\n",
|
||||
"Tool calls: [ChatCompletionMessageToolCall(id='call_NFO3TSZuRRO8Eu3Cv79uiQ', function=Function(arguments='{\"city\": \"Paris\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function', index=0)]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from openai import OpenAI\n",
|
||||
"import json\n",
|
||||
"from sglang.utils import wait_for_server, print_highlight, terminate_process\n",
|
||||
"from sglang.test.test_utils import is_in_ci\n",
|
||||
"\n",
|
||||
"if is_in_ci():\n",
|
||||
" from patch import launch_server_cmd\n",
|
||||
"else:\n",
|
||||
" from sglang.utils import launch_server_cmd\n",
|
||||
" import nest_asyncio\n",
|
||||
"\n",
|
||||
" nest_asyncio.apply()\n",
|
||||
"\n",
|
||||
"# Start a new server session for tool choice examples\n",
|
||||
"server_process_tool_choice, port_tool_choice = launch_server_cmd(\n",
|
||||
" \"python3 -m sglang.launch_server --model-path Qwen/Qwen2.5-7B-Instruct --tool-call-parser qwen25 --host 0.0.0.0\"\n",
|
||||
")\n",
|
||||
"wait_for_server(f\"http://localhost:{port_tool_choice}\")\n",
|
||||
"\n",
|
||||
"# Initialize client for tool choice examples\n",
|
||||
"client_tool_choice = OpenAI(\n",
|
||||
" api_key=\"None\", base_url=f\"http://0.0.0.0:{port_tool_choice}/v1\"\n",
|
||||
")\n",
|
||||
"model_name_tool_choice = client_tool_choice.models.list().data[0].id\n",
|
||||
"\n",
|
||||
"# Example with tool_choice=\"required\" - forces the model to call a tool\n",
|
||||
"messages_required = [\n",
|
||||
" {\"role\": \"user\", \"content\": \"Hello, what is the capital of France?\"}\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"# Define tools\n",
|
||||
"tools = [\n",
|
||||
" {\n",
|
||||
" \"type\": \"function\",\n",
|
||||
" \"function\": {\n",
|
||||
" \"name\": \"get_current_weather\",\n",
|
||||
" \"description\": \"Get the current weather in a given location\",\n",
|
||||
" \"parameters\": {\n",
|
||||
" \"type\": \"object\",\n",
|
||||
" \"properties\": {\n",
|
||||
" \"city\": {\n",
|
||||
" \"type\": \"string\",\n",
|
||||
" \"description\": \"The city to find the weather for, e.g. 'San Francisco'\",\n",
|
||||
" },\n",
|
||||
" \"unit\": {\n",
|
||||
" \"type\": \"string\",\n",
|
||||
" \"description\": \"The unit to fetch the temperature in\",\n",
|
||||
" \"enum\": [\"celsius\", \"fahrenheit\"],\n",
|
||||
" },\n",
|
||||
" },\n",
|
||||
" \"required\": [\"city\", \"unit\"],\n",
|
||||
" },\n",
|
||||
" },\n",
|
||||
" }\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"response_required = client_tool_choice.chat.completions.create(\n",
|
||||
" model=model_name_tool_choice,\n",
|
||||
" messages=messages_required,\n",
|
||||
" temperature=0,\n",
|
||||
" max_tokens=1024,\n",
|
||||
" tools=tools,\n",
|
||||
" tool_choice=\"required\", # Force the model to call a tool\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print_highlight(\"Response with tool_choice='required':\")\n",
|
||||
"print(\"Content:\", response_required.choices[0].message.content)\n",
|
||||
"print(\"Tool calls:\", response_required.choices[0].message.tool_calls)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example: Specific Function Choice\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Response with specific function choice:\n",
|
||||
"Content: None\n",
|
||||
"Tool calls: [ChatCompletionMessageToolCall(id='call_fGL_1qsPQFqntNBPkSynJw', function=Function(arguments='{\"city\": \"Sophia Antipolis\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function', index=0)]\n",
|
||||
"Called function: get_current_weather\n",
|
||||
"Arguments: {\"city\": \"Sophia Antipolis\", \"unit\": \"celsius\"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Example with specific function choice - forces the model to call a specific function\n",
|
||||
"messages_specific = [\n",
|
||||
" {\"role\": \"user\", \"content\": \"What are the most attactive places in France?\"}\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"response_specific = client_tool_choice.chat.completions.create(\n",
|
||||
" model=model_name_tool_choice,\n",
|
||||
" messages=messages_specific,\n",
|
||||
" temperature=0,\n",
|
||||
" max_tokens=1024,\n",
|
||||
" tools=tools,\n",
|
||||
" tool_choice={\n",
|
||||
" \"type\": \"function\",\n",
|
||||
" \"function\": {\"name\": \"get_current_weather\"},\n",
|
||||
" }, # Force the model to call the specific get_current_weather function\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print_highlight(\"Response with specific function choice:\")\n",
|
||||
"print(\"Content:\", response_specific.choices[0].message.content)\n",
|
||||
"print(\"Tool calls:\", response_specific.choices[0].message.tool_calls)\n",
|
||||
"\n",
|
||||
"if response_specific.choices[0].message.tool_calls:\n",
|
||||
" tool_call = response_specific.choices[0].message.tool_calls[0]\n",
|
||||
" print(f\"Called function: {tool_call.function.name}\")\n",
|
||||
" print(f\"Arguments: {tool_call.function.arguments}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -444,7 +604,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sglang as sgl\n",
|
||||
"from sglang.srt.function_call_parser import FunctionCallParser\n",
|
||||
"from sglang.srt.function_call.function_call_parser import FunctionCallParser\n",
|
||||
"from sglang.srt.managers.io_struct import Tool, Function\n",
|
||||
"\n",
|
||||
"llm = sgl.Engine(model_path=\"Qwen/Qwen2.5-7B-Instruct\")\n",
|
||||
|
||||
Reference in New Issue
Block a user