diff --git a/docs/backend/structured_outputs.ipynb b/docs/backend/structured_outputs.ipynb
index 94e8902d6..3d1d0c15a 100644
--- a/docs/backend/structured_outputs.ipynb
+++ b/docs/backend/structured_outputs.ipynb
@@ -15,15 +15,15 @@
"\n",
"SGLang supports two grammar backends:\n",
"\n",
- "- [Outlines](https://github.com/dottxt-ai/outlines) (default): Supports JSON schema and regular expression constraints.\n",
- "- [XGrammar](https://github.com/mlc-ai/xgrammar): Supports JSON schema, regular expression, and EBNF constraints.\n",
+ "- [Outlines](https://github.com/dottxt-ai/outlines): Supports JSON schema and regular expression constraints.\n",
+ "- [XGrammar](https://github.com/mlc-ai/xgrammar)(default): Supports JSON schema, regular expression, and EBNF constraints.\n",
"- [Llguidance](https://github.com/guidance-ai/llguidance): Supports JSON schema, regular expression, and EBNF constraints.\n",
"\n",
"We suggest using XGrammar for its better performance and utility. XGrammar currently uses the [GGML BNF format](https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md). For more details, see [XGrammar technical overview](https://blog.mlc.ai/2024/11/22/achieving-efficient-flexible-portable-structured-generation-with-xgrammar).\n",
"\n",
- "To use Xgrammar, simply add `--grammar-backend xgrammar` when launching the server.\n",
+ "To use Outlines, simply add `--grammar-backend outlines` when launching the server.\n",
"To use llguidance, add `--grammar-backend llguidance` when launching the server.\n",
- "If no backend is specified, Outlines will be used as the default.\n",
+ "If no backend is specified, XGrammar will be used as the default.\n",
"\n",
"For better output quality, **It's advisable to explicitly include instructions in the prompt to guide the model to generate the desired format.** For example, you can specify, 'Please generate the output in the following JSON format: ...'.\n"
]
@@ -56,7 +56,7 @@
"\n",
"\n",
"server_process, port = launch_server_cmd(\n",
- " \"python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --host 0.0.0.0 --grammar-backend xgrammar\"\n",
+ " \"python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --host 0.0.0.0\"\n",
")\n",
"\n",
"wait_for_server(f\"http://localhost:{port}\")\n",
@@ -229,6 +229,131 @@
"print_highlight(response.choices[0].message.content)"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Structural Tag"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tool_get_current_weather = {\n",
+ " \"type\": \"function\",\n",
+ " \"function\": {\n",
+ " \"name\": \"get_current_weather\",\n",
+ " \"description\": \"Get the current weather in a given location\",\n",
+ " \"parameters\": {\n",
+ " \"type\": \"object\",\n",
+ " \"properties\": {\n",
+ " \"city\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"The city to find the weather for, e.g. 'San Francisco'\",\n",
+ " },\n",
+ " \"state\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"the two-letter abbreviation for the state that the city is\"\n",
+ " \" in, e.g. 'CA' which would mean 'California'\",\n",
+ " },\n",
+ " \"unit\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"The unit to fetch the temperature in\",\n",
+ " \"enum\": [\"celsius\", \"fahrenheit\"],\n",
+ " },\n",
+ " },\n",
+ " \"required\": [\"city\", \"state\", \"unit\"],\n",
+ " },\n",
+ " },\n",
+ "}\n",
+ "\n",
+ "tool_get_current_date = {\n",
+ " \"type\": \"function\",\n",
+ " \"function\": {\n",
+ " \"name\": \"get_current_date\",\n",
+ " \"description\": \"Get the current date and time for a given timezone\",\n",
+ " \"parameters\": {\n",
+ " \"type\": \"object\",\n",
+ " \"properties\": {\n",
+ " \"timezone\": {\n",
+ " \"type\": \"string\",\n",
+ " \"description\": \"The timezone to fetch the current date and time for, e.g. 'America/New_York'\",\n",
+ " }\n",
+ " },\n",
+ " \"required\": [\"timezone\"],\n",
+ " },\n",
+ " },\n",
+ "}\n",
+ "\n",
+ "schema_get_current_weather = tool_get_current_weather[\"function\"][\"parameters\"]\n",
+ "schema_get_current_date = tool_get_current_date[\"function\"][\"parameters\"]\n",
+ "\n",
+ "\n",
+ "def get_messages():\n",
+ " return [\n",
+ " {\n",
+ " \"role\": \"system\",\n",
+ " \"content\": f\"\"\"\n",
+ "# Tool Instructions\n",
+ "- Always execute python code in messages that you share.\n",
+ "- When looking for real time information use relevant functions if available else fallback to brave_search\n",
+ "You have access to the following functions:\n",
+ "Use the function 'get_current_weather' to: Get the current weather in a given location\n",
+ "{tool_get_current_weather[\"function\"]}\n",
+ "Use the function 'get_current_date' to: Get the current date and time for a given timezone\n",
+ "{tool_get_current_date[\"function\"]}\n",
+ "If a you choose to call a function ONLY reply in the following format:\n",
+ "<{{start_tag}}={{function_name}}>{{parameters}}{{end_tag}}\n",
+ "where\n",
+ "start_tag => ` a JSON dict with the function argument name as key and function argument value as value.\n",
+ "end_tag => ``\n",
+ "Here is an example,\n",
+ "{{\"example_name\": \"example_value\"}}\n",
+ "Reminder:\n",
+ "- Function calls MUST follow the specified format\n",
+ "- Required parameters MUST be specified\n",
+ "- Only call one function at a time\n",
+ "- Put the entire function call reply on one line\n",
+ "- Always add your sources when using search results to answer the user query\n",
+ "You are a helpful assistant.\"\"\",\n",
+ " },\n",
+ " {\n",
+ " \"role\": \"user\",\n",
+ " \"content\": \"You are in New York. Please get the current date and time, and the weather.\",\n",
+ " },\n",
+ " ]\n",
+ "\n",
+ "\n",
+ "messages = get_messages()\n",
+ "\n",
+ "response = client.chat.completions.create(\n",
+ " model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
+ " messages=messages,\n",
+ " response_format={\n",
+ " \"type\": \"structural_tag\",\n",
+ " \"structures\": [\n",
+ " {\n",
+ " \"begin\": \"\",\n",
+ " \"schema\": schema_get_current_weather,\n",
+ " \"end\": \"\",\n",
+ " },\n",
+ " {\n",
+ " \"begin\": \"\",\n",
+ " \"schema\": schema_get_current_date,\n",
+ " \"end\": \"\",\n",
+ " },\n",
+ " ],\n",
+ " \"triggers\": [\"\",\n",
+ " \"schema\": schema_get_current_weather,\n",
+ " \"end\": \"\",\n",
+ " },\n",
+ " {\n",
+ " \"begin\": \"\",\n",
+ " \"schema\": schema_get_current_date,\n",
+ " \"end\": \"\",\n",
+ " },\n",
+ " ],\n",
+ " \"triggers\": [\"\",\n",
+ " \"schema\": schema_get_current_weather,\n",
+ " \"end\": \"\",\n",
+ " },\n",
+ " {\n",
+ " \"begin\": \"\",\n",
+ " \"schema\": schema_get_current_date,\n",
+ " \"end\": \"\",\n",
+ " },\n",
+ " ],\n",
+ " \"triggers\": [\"