[Docs] Add Support for Pydantic Structured Output Format (#2697)

2025-01-01 23:08:43 +00:00
parent b6e0cfb5e1
commit 062c48d2bd
4 changed files with 174 additions and 122 deletions
--- a/docs/backend/structured_outputs.ipynb
+++ b/docs/backend/structured_outputs.ipynb
@@ -16,16 +16,11 @@
    "SGLang supports two grammar backends:\n",
    "\n",
    "- [Outlines](https://github.com/dottxt-ai/outlines) (default): Supports JSON schema and regular expression constraints.\n",
-    "- [XGrammar](https://github.com/mlc-ai/xgrammar): Supports JSON schema and EBNF constraints.\n",
-    "  - XGrammar currently uses the [GGML BNF format](https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md)\n",
+    "- [XGrammar](https://github.com/mlc-ai/xgrammar): Supports JSON schema and EBNF constraints and currently uses the [GGML BNF format](https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md).\n",
    "\n",
-    "Initialize the XGrammar backend using `--grammar-backend xgrammar` flag\n",
-    "```bash\n",
-    "python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct \\\n",
-    "--port 30000 --host 0.0.0.0 --grammar-backend [xgrammar|outlines] # xgrammar or outlines (default: outlines)\n",
-    "```\n",
+    "We suggest using XGrammar whenever possible for its better performance. For more details, see [XGrammar technical overview](https://blog.mlc.ai/2024/11/22/achieving-efficient-flexible-portable-structured-generation-with-xgrammar).\n",
    "\n",
-    "We suggest using XGrammar whenever possible for its better performance. For more details, see [XGrammar technical overview](https://blog.mlc.ai/2024/11/22/achieving-efficient-flexible-portable-structured-generation-with-xgrammar)."
+    "To use Xgrammar, simply add `--grammar-backend` xgrammar when launching the server. If no backend is specified, Outlines will be used as the default."
   ]
  },
  {
@@ -35,13 +30,6 @@
    "## OpenAI Compatible API"
   ]
  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "To use Xgrammar, simply add `--grammar-backend xgrammar` when launching the server. If no backend is specified, Outlines will be used as the default."
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -68,7 +56,64 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "### JSON"
+    "### JSON\n",
+    "\n",
+    "you can directly define a JSON schema or use [Pydantic](https://docs.pydantic.dev/latest/) to define and validate the response."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Using Pydantic**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pydantic import BaseModel, Field\n",
+    "\n",
+    "\n",
+    "# Define the schema using Pydantic\n",
+    "class CapitalInfo(BaseModel):\n",
+    "    name: str = Field(..., pattern=r\"^\\w+$\", description=\"Name of the capital city\")\n",
+    "    population: int = Field(..., description=\"Population of the capital city\")\n",
+    "\n",
+    "\n",
+    "response = client.chat.completions.create(\n",
+    "    model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
+    "    messages=[\n",
+    "        {\n",
+    "            \"role\": \"user\",\n",
+    "            \"content\": \"Give me the information of the capital of France in the JSON format.\",\n",
+    "        },\n",
+    "    ],\n",
+    "    temperature=0,\n",
+    "    max_tokens=128,\n",
+    "    response_format={\n",
+    "        \"type\": \"json_schema\",\n",
+    "        \"json_schema\": {\n",
+    "            \"name\": \"foo\",\n",
+    "            # convert the pydantic model to json schema\n",
+    "            \"schema\": CapitalInfo.model_json_schema(),\n",
+    "        },\n",
+    "    },\n",
+    ")\n",
+    "\n",
+    "response_content = response.choices[0].message.content\n",
+    "# validate the JSON response by the pydantic model\n",
+    "capital_info = CapitalInfo.model_validate_json(response_content)\n",
+    "print_highlight(f\"Validated response: {capital_info.model_dump_json()}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**JSON Schema Directly**\n"
   ]
  },
  {
@@ -225,15 +270,64 @@
    "### JSON"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Using Pydantic**"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
-    "import json\n",
    "import requests\n",
+    "import json\n",
+    "from pydantic import BaseModel, Field\n",
    "\n",
+    "\n",
+    "# Define the schema using Pydantic\n",
+    "class CapitalInfo(BaseModel):\n",
+    "    name: str = Field(..., pattern=r\"^\\w+$\", description=\"Name of the capital city\")\n",
+    "    population: int = Field(..., description=\"Population of the capital city\")\n",
+    "\n",
+    "\n",
+    "# Make API request\n",
+    "response = requests.post(\n",
+    "    \"http://localhost:30010/generate\",\n",
+    "    json={\n",
+    "        \"text\": \"Here is the information of the capital of France in the JSON format.\\n\",\n",
+    "        \"sampling_params\": {\n",
+    "            \"temperature\": 0,\n",
+    "            \"max_new_tokens\": 64,\n",
+    "            \"json_schema\": json.dumps(CapitalInfo.model_json_schema()),\n",
+    "        },\n",
+    "    },\n",
+    ")\n",
+    "print_highlight(response.json())\n",
+    "\n",
+    "\n",
+    "response_data = json.loads(response.json()[\"text\"])\n",
+    "# validate the response by the pydantic model\n",
+    "capital_info = CapitalInfo.model_validate(response_data)\n",
+    "print_highlight(f\"Validated response: {capital_info.model_dump_json()}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**JSON Schema Directly**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
    "json_schema = json.dumps(\n",
    "    {\n",
    "        \"type\": \"object\",\n",
@@ -379,6 +473,13 @@
    "### JSON"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Using Pydantic**"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -386,7 +487,49 @@
   "outputs": [],
   "source": [
    "import json\n",
+    "from pydantic import BaseModel, Field\n",
    "\n",
+    "\n",
+    "prompts = [\n",
+    "    \"Give me the information of the capital of China in the JSON format.\",\n",
+    "    \"Give me the information of the capital of France in the JSON format.\",\n",
+    "    \"Give me the information of the capital of Ireland in the JSON format.\",\n",
+    "]\n",
+    "\n",
+    "\n",
+    "# Define the schema using Pydantic\n",
+    "class CapitalInfo(BaseModel):\n",
+    "    name: str = Field(..., pattern=r\"^\\w+$\", description=\"Name of the capital city\")\n",
+    "    population: int = Field(..., description=\"Population of the capital city\")\n",
+    "\n",
+    "\n",
+    "sampling_params = {\n",
+    "    \"temperature\": 0.1,\n",
+    "    \"top_p\": 0.95,\n",
+    "    \"json_schema\": json.dumps(CapitalInfo.model_json_schema()),\n",
+    "}\n",
+    "\n",
+    "outputs = llm_xgrammar.generate(prompts, sampling_params)\n",
+    "for prompt, output in zip(prompts, outputs):\n",
+    "    print_highlight(\"===============================\")\n",
+    "    print_highlight(f\"Prompt: {prompt}\")  # validate the output by the pydantic model\n",
+    "    capital_info = CapitalInfo.model_validate_json(output[\"text\"])\n",
+    "    print_highlight(f\"Validated output: {capital_info.model_dump_json()}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**JSON Schema Directly**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
    "prompts = [\n",
    "    \"Give me the information of the capital of China in the JSON format.\",\n",
    "    \"Give me the information of the capital of France in the JSON format.\",\n",