diff --git a/docs/backend/openai_api_completions.ipynb b/docs/backend/openai_api_completions.ipynb index 2f4b988d5..e39b328f1 100644 --- a/docs/backend/openai_api_completions.ipynb +++ b/docs/backend/openai_api_completions.ipynb @@ -87,7 +87,6 @@ "response = client.chat.completions.create(\n", " model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n", " messages=[\n", - " {\"role\": \"system\", \"content\": \"You are a helpful AI assistant\"},\n", " {\"role\": \"user\", \"content\": \"List 3 countries and their capitals.\"},\n", " ],\n", " temperature=0,\n", @@ -184,7 +183,6 @@ "## Completions\n", "\n", "### Usage\n", - "\n", "Completions API is similar to Chat Completions API, but without the `messages` parameter or chat templates." ] }, @@ -253,6 +251,77 @@ "print_highlight(f\"Response: {response}\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Structured decoding (JSON, Regex)\n", + "You can specify a JSON schema or a regular expression to constrain the model output. The model output will be guaranteed to follow the given constraints.\n", + "\n", + "### JSON" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "json_schema = json.dumps(\n", + " {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"name\": {\"type\": \"string\", \"pattern\": \"^[\\\\w]+$\"},\n", + " \"population\": {\"type\": \"integer\"},\n", + " },\n", + " \"required\": [\"name\", \"population\"],\n", + " }\n", + ")\n", + "\n", + "response = client.chat.completions.create(\n", + " model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"Give me the information of the capital of France in the JSON format.\"},\n", + " ],\n", + " temperature=0,\n", + " max_tokens=128,\n", + " response_format={\n", + " \"type\": \"json_schema\",\n", + " \"json_schema\": {\"name\": \"foo\", \"schema\": json.loads(json_schema)},\n", + " },\n", + ")\n", + "\n", + "print_highlight(response.choices[0].message.content)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Regular expression" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "response = client.chat.completions.create(\n", + " model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"What is the capital of France?\"},\n", + " ],\n", + " temperature=0,\n", + " max_tokens=128,\n", + " extra_body={\"regex\": \"(Paris|London)\"},\n", + ")\n", + "\n", + "print_highlight(response.choices[0].message.content)" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/docs/references/sampling_params.md b/docs/references/sampling_params.md index 21850e935..333b8fcd1 100644 --- a/docs/references/sampling_params.md +++ b/docs/references/sampling_params.md @@ -177,3 +177,50 @@ print(response.json()) The `image_data` can be a file name, a URL, or a base64 encoded string. See also `python/sglang/srt/utils.py:load_image`. Streaming is supported in a similar manner as [above](#streaming). + +### Structured decoding (JSON, Regex) +You can specify a JSON schema or a regular expression to constrain the model output. The model output will be guaranteed to follow the given constraints. + +```python +import json +import requests + +json_schema = json.dumps( + { + "type": "object", + "properties": { + "name": {"type": "string", "pattern": "^[\\w]+$"}, + "population": {"type": "integer"}, + }, + "required": ["name", "population"], + } +) + +# JSON +response = requests.post( + "http://localhost:30000/generate", + json={ + "text": "Here is the information of the capital of France in the JSON format.\n", + "sampling_params": { + "temperature": 0, + "max_new_tokens": 64, + "json_schema": json_schema, + }, + }, +) +print(response.json()) + +# Regular expression +response = requests.post( + "http://localhost:30000/generate", + json={ + "text": "Paris is the capital of", + "sampling_params": { + "temperature": 0, + "max_new_tokens": 64, + "regex": "(France|England)", + }, + }, +) +print(response.json()) +``` \ No newline at end of file