From 452db508089722f0002a72e65c87c0ddd2f8fb53 Mon Sep 17 00:00:00 2001 From: mlmz <54172054+minleminzui@users.noreply.github.com> Date: Mon, 17 Mar 2025 09:53:43 +0800 Subject: [PATCH] Constraint Decoding: Set xgrammar as the default grammar backend (#4386) --- docs/backend/structured_outputs.ipynb | 238 ++++++++++++++++++++++- python/pyproject.toml | 2 +- python/sglang/srt/openai_api/protocol.py | 11 +- python/sglang/srt/server_args.py | 2 +- 4 files changed, 243 insertions(+), 10 deletions(-) diff --git a/docs/backend/structured_outputs.ipynb b/docs/backend/structured_outputs.ipynb index 94e8902d6..3d1d0c15a 100644 --- a/docs/backend/structured_outputs.ipynb +++ b/docs/backend/structured_outputs.ipynb @@ -15,15 +15,15 @@ "\n", "SGLang supports two grammar backends:\n", "\n", - "- [Outlines](https://github.com/dottxt-ai/outlines) (default): Supports JSON schema and regular expression constraints.\n", - "- [XGrammar](https://github.com/mlc-ai/xgrammar): Supports JSON schema, regular expression, and EBNF constraints.\n", + "- [Outlines](https://github.com/dottxt-ai/outlines): Supports JSON schema and regular expression constraints.\n", + "- [XGrammar](https://github.com/mlc-ai/xgrammar)(default): Supports JSON schema, regular expression, and EBNF constraints.\n", "- [Llguidance](https://github.com/guidance-ai/llguidance): Supports JSON schema, regular expression, and EBNF constraints.\n", "\n", "We suggest using XGrammar for its better performance and utility. XGrammar currently uses the [GGML BNF format](https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md). For more details, see [XGrammar technical overview](https://blog.mlc.ai/2024/11/22/achieving-efficient-flexible-portable-structured-generation-with-xgrammar).\n", "\n", - "To use Xgrammar, simply add `--grammar-backend xgrammar` when launching the server.\n", + "To use Outlines, simply add `--grammar-backend outlines` when launching the server.\n", "To use llguidance, add `--grammar-backend llguidance` when launching the server.\n", - "If no backend is specified, Outlines will be used as the default.\n", + "If no backend is specified, XGrammar will be used as the default.\n", "\n", "For better output quality, **It's advisable to explicitly include instructions in the prompt to guide the model to generate the desired format.** For example, you can specify, 'Please generate the output in the following JSON format: ...'.\n" ] @@ -56,7 +56,7 @@ "\n", "\n", "server_process, port = launch_server_cmd(\n", - " \"python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --host 0.0.0.0 --grammar-backend xgrammar\"\n", + " \"python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --host 0.0.0.0\"\n", ")\n", "\n", "wait_for_server(f\"http://localhost:{port}\")\n", @@ -229,6 +229,131 @@ "print_highlight(response.choices[0].message.content)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Structural Tag" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tool_get_current_weather = {\n", + " \"type\": \"function\",\n", + " \"function\": {\n", + " \"name\": \"get_current_weather\",\n", + " \"description\": \"Get the current weather in a given location\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"city\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The city to find the weather for, e.g. 'San Francisco'\",\n", + " },\n", + " \"state\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"the two-letter abbreviation for the state that the city is\"\n", + " \" in, e.g. 'CA' which would mean 'California'\",\n", + " },\n", + " \"unit\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The unit to fetch the temperature in\",\n", + " \"enum\": [\"celsius\", \"fahrenheit\"],\n", + " },\n", + " },\n", + " \"required\": [\"city\", \"state\", \"unit\"],\n", + " },\n", + " },\n", + "}\n", + "\n", + "tool_get_current_date = {\n", + " \"type\": \"function\",\n", + " \"function\": {\n", + " \"name\": \"get_current_date\",\n", + " \"description\": \"Get the current date and time for a given timezone\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"timezone\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The timezone to fetch the current date and time for, e.g. 'America/New_York'\",\n", + " }\n", + " },\n", + " \"required\": [\"timezone\"],\n", + " },\n", + " },\n", + "}\n", + "\n", + "schema_get_current_weather = tool_get_current_weather[\"function\"][\"parameters\"]\n", + "schema_get_current_date = tool_get_current_date[\"function\"][\"parameters\"]\n", + "\n", + "\n", + "def get_messages():\n", + " return [\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": f\"\"\"\n", + "# Tool Instructions\n", + "- Always execute python code in messages that you share.\n", + "- When looking for real time information use relevant functions if available else fallback to brave_search\n", + "You have access to the following functions:\n", + "Use the function 'get_current_weather' to: Get the current weather in a given location\n", + "{tool_get_current_weather[\"function\"]}\n", + "Use the function 'get_current_date' to: Get the current date and time for a given timezone\n", + "{tool_get_current_date[\"function\"]}\n", + "If a you choose to call a function ONLY reply in the following format:\n", + "<{{start_tag}}={{function_name}}>{{parameters}}{{end_tag}}\n", + "where\n", + "start_tag => ` a JSON dict with the function argument name as key and function argument value as value.\n", + "end_tag => ``\n", + "Here is an example,\n", + "{{\"example_name\": \"example_value\"}}\n", + "Reminder:\n", + "- Function calls MUST follow the specified format\n", + "- Required parameters MUST be specified\n", + "- Only call one function at a time\n", + "- Put the entire function call reply on one line\n", + "- Always add your sources when using search results to answer the user query\n", + "You are a helpful assistant.\"\"\",\n", + " },\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"You are in New York. Please get the current date and time, and the weather.\",\n", + " },\n", + " ]\n", + "\n", + "\n", + "messages = get_messages()\n", + "\n", + "response = client.chat.completions.create(\n", + " model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n", + " messages=messages,\n", + " response_format={\n", + " \"type\": \"structural_tag\",\n", + " \"structures\": [\n", + " {\n", + " \"begin\": \"\",\n", + " \"schema\": schema_get_current_weather,\n", + " \"end\": \"\",\n", + " },\n", + " {\n", + " \"begin\": \"\",\n", + " \"schema\": schema_get_current_date,\n", + " \"end\": \"\",\n", + " },\n", + " ],\n", + " \"triggers\": [\"\",\n", + " \"schema\": schema_get_current_weather,\n", + " \"end\": \"\",\n", + " },\n", + " {\n", + " \"begin\": \"\",\n", + " \"schema\": schema_get_current_date,\n", + " \"end\": \"\",\n", + " },\n", + " ],\n", + " \"triggers\": [\"\",\n", + " \"schema\": schema_get_current_weather,\n", + " \"end\": \"\",\n", + " },\n", + " {\n", + " \"begin\": \"\",\n", + " \"schema\": schema_get_current_date,\n", + " \"end\": \"\",\n", + " },\n", + " ],\n", + " \"triggers\": [\"