diff --git a/docs/backend/openai_api_completions.ipynb b/docs/backend/openai_api_completions.ipynb
index 2f4b988d5..e39b328f1 100644
--- a/docs/backend/openai_api_completions.ipynb
+++ b/docs/backend/openai_api_completions.ipynb
@@ -87,7 +87,6 @@
     "response = client.chat.completions.create(\n",
     "    model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
     "    messages=[\n",
-    "        {\"role\": \"system\", \"content\": \"You are a helpful AI assistant\"},\n",
     "        {\"role\": \"user\", \"content\": \"List 3 countries and their capitals.\"},\n",
     "    ],\n",
     "    temperature=0,\n",
@@ -184,7 +183,6 @@
     "## Completions\n",
     "\n",
     "### Usage\n",
-    "\n",
     "Completions API is similar to Chat Completions API, but without the `messages` parameter or chat templates."
    ]
   },
@@ -253,6 +251,77 @@
     "print_highlight(f\"Response: {response}\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Structured decoding (JSON, Regex)\n",
+    "You can specify a JSON schema or a regular expression to constrain the model output. The model output will be guaranteed to follow the given constraints.\n",
+    "\n",
+    "### JSON"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "json_schema = json.dumps(\n",
+    "    {\n",
+    "        \"type\": \"object\",\n",
+    "        \"properties\": {\n",
+    "            \"name\": {\"type\": \"string\", \"pattern\": \"^[\\\\w]+$\"},\n",
+    "            \"population\": {\"type\": \"integer\"},\n",
+    "        },\n",
+    "        \"required\": [\"name\", \"population\"],\n",
+    "    }\n",
+    ")\n",
+    "\n",
+    "response = client.chat.completions.create(\n",
+    "    model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
+    "    messages=[\n",
+    "        {\"role\": \"user\", \"content\": \"Give me the information of the capital of France in the JSON format.\"},\n",
+    "    ],\n",
+    "    temperature=0,\n",
+    "    max_tokens=128,\n",
+    "    response_format={\n",
+    "        \"type\": \"json_schema\",\n",
+    "        \"json_schema\": {\"name\": \"foo\", \"schema\": json.loads(json_schema)},\n",
+    "    },\n",
+    ")\n",
+    "\n",
+    "print_highlight(response.choices[0].message.content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Regular expression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response = client.chat.completions.create(\n",
+    "    model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
+    "    messages=[\n",
+    "        {\"role\": \"user\", \"content\": \"What is the capital of France?\"},\n",
+    "    ],\n",
+    "    temperature=0,\n",
+    "    max_tokens=128,\n",
+    "    extra_body={\"regex\": \"(Paris|London)\"},\n",
+    ")\n",
+    "\n",
+    "print_highlight(response.choices[0].message.content)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
diff --git a/docs/references/sampling_params.md b/docs/references/sampling_params.md
index 21850e935..333b8fcd1 100644
--- a/docs/references/sampling_params.md
+++ b/docs/references/sampling_params.md
@@ -177,3 +177,50 @@ print(response.json())
 
 The `image_data` can be a file name, a URL, or a base64 encoded string. See also `python/sglang/srt/utils.py:load_image`.
 Streaming is supported in a similar manner as [above](#streaming).
+
+### Structured decoding (JSON, Regex)
+You can specify a JSON schema or a regular expression to constrain the model output. The model output will be guaranteed to follow the given constraints.
+
+```python
+import json
+import requests
+
+json_schema = json.dumps(
+    {
+        "type": "object",
+        "properties": {
+            "name": {"type": "string", "pattern": "^[\\w]+$"},
+            "population": {"type": "integer"},
+        },
+        "required": ["name", "population"],
+    }
+)
+
+# JSON
+response = requests.post(
+    "http://localhost:30000/generate",
+    json={
+        "text": "Here is the information of the capital of France in the JSON format.\n",
+        "sampling_params": {
+            "temperature": 0,
+            "max_new_tokens": 64,
+            "json_schema": json_schema,
+        },
+    },
+)
+print(response.json())
+
+# Regular expression
+response = requests.post(
+    "http://localhost:30000/generate",
+    json={
+        "text": "Paris is the capital of",
+        "sampling_params": {
+            "temperature": 0,
+            "max_new_tokens": 64,
+            "regex": "(France|England)",
+        },
+    },
+)
+print(response.json())
+```
\ No newline at end of file