Fix regex docs (#1909)
This commit is contained in:
@@ -87,7 +87,6 @@
|
||||
"response = client.chat.completions.create(\n",
|
||||
" model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": \"You are a helpful AI assistant\"},\n",
|
||||
" {\"role\": \"user\", \"content\": \"List 3 countries and their capitals.\"},\n",
|
||||
" ],\n",
|
||||
" temperature=0,\n",
|
||||
@@ -184,7 +183,6 @@
|
||||
"## Completions\n",
|
||||
"\n",
|
||||
"### Usage\n",
|
||||
"\n",
|
||||
"Completions API is similar to Chat Completions API, but without the `messages` parameter or chat templates."
|
||||
]
|
||||
},
|
||||
@@ -253,6 +251,77 @@
|
||||
"print_highlight(f\"Response: {response}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Structured decoding (JSON, Regex)\n",
|
||||
"You can specify a JSON schema or a regular expression to constrain the model output. The model output will be guaranteed to follow the given constraints.\n",
|
||||
"\n",
|
||||
"### JSON"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"json_schema = json.dumps(\n",
|
||||
" {\n",
|
||||
" \"type\": \"object\",\n",
|
||||
" \"properties\": {\n",
|
||||
" \"name\": {\"type\": \"string\", \"pattern\": \"^[\\\\w]+$\"},\n",
|
||||
" \"population\": {\"type\": \"integer\"},\n",
|
||||
" },\n",
|
||||
" \"required\": [\"name\", \"population\"],\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"response = client.chat.completions.create(\n",
|
||||
" model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"user\", \"content\": \"Give me the information of the capital of France in the JSON format.\"},\n",
|
||||
" ],\n",
|
||||
" temperature=0,\n",
|
||||
" max_tokens=128,\n",
|
||||
" response_format={\n",
|
||||
" \"type\": \"json_schema\",\n",
|
||||
" \"json_schema\": {\"name\": \"foo\", \"schema\": json.loads(json_schema)},\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print_highlight(response.choices[0].message.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Regular expression"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"response = client.chat.completions.create(\n",
|
||||
" model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"user\", \"content\": \"What is the capital of France?\"},\n",
|
||||
" ],\n",
|
||||
" temperature=0,\n",
|
||||
" max_tokens=128,\n",
|
||||
" extra_body={\"regex\": \"(Paris|London)\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print_highlight(response.choices[0].message.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -177,3 +177,50 @@ print(response.json())
|
||||
|
||||
The `image_data` can be a file name, a URL, or a base64 encoded string. See also `python/sglang/srt/utils.py:load_image`.
|
||||
Streaming is supported in a similar manner as [above](#streaming).
|
||||
|
||||
### Structured decoding (JSON, Regex)
|
||||
You can specify a JSON schema or a regular expression to constrain the model output. The model output will be guaranteed to follow the given constraints.
|
||||
|
||||
```python
|
||||
import json
|
||||
import requests
|
||||
|
||||
json_schema = json.dumps(
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string", "pattern": "^[\\w]+$"},
|
||||
"population": {"type": "integer"},
|
||||
},
|
||||
"required": ["name", "population"],
|
||||
}
|
||||
)
|
||||
|
||||
# JSON
|
||||
response = requests.post(
|
||||
"http://localhost:30000/generate",
|
||||
json={
|
||||
"text": "Here is the information of the capital of France in the JSON format.\n",
|
||||
"sampling_params": {
|
||||
"temperature": 0,
|
||||
"max_new_tokens": 64,
|
||||
"json_schema": json_schema,
|
||||
},
|
||||
},
|
||||
)
|
||||
print(response.json())
|
||||
|
||||
# Regular expression
|
||||
response = requests.post(
|
||||
"http://localhost:30000/generate",
|
||||
json={
|
||||
"text": "Paris is the capital of",
|
||||
"sampling_params": {
|
||||
"temperature": 0,
|
||||
"max_new_tokens": 64,
|
||||
"regex": "(France|England)",
|
||||
},
|
||||
},
|
||||
)
|
||||
print(response.json())
|
||||
```
|
||||
Reference in New Issue
Block a user