Fix regex docs (#1909)

2024-11-03 14:18:16 -08:00
parent 65859754f1
commit 1853c3523b
2 changed files with 118 additions and 2 deletions
--- a/docs/backend/openai_api_completions.ipynb
+++ b/docs/backend/openai_api_completions.ipynb
@@ -87,7 +87,6 @@
    "response = client.chat.completions.create(\n",
    "    model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
    "    messages=[\n",
    "        {\"role\": \"system\", \"content\": \"You are a helpful AI assistant\"},\n",
    "        {\"role\": \"user\", \"content\": \"List 3 countries and their capitals.\"},\n",
    "    ],\n",
    "    temperature=0,\n",
@@ -184,7 +183,6 @@
    "## Completions\n",
    "\n",
    "### Usage\n",
    "\n",
    "Completions API is similar to Chat Completions API, but without the `messages` parameter or chat templates."
   ]
  },
@@ -253,6 +251,77 @@
    "print_highlight(f\"Response: {response}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Structured decoding (JSON, Regex)\n",
    "You can specify a JSON schema or a regular expression to constrain the model output. The model output will be guaranteed to follow the given constraints.\n",
    "\n",
    "### JSON"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "json_schema = json.dumps(\n",
    "    {\n",
    "        \"type\": \"object\",\n",
    "        \"properties\": {\n",
    "            \"name\": {\"type\": \"string\", \"pattern\": \"^[\\\\w]+$\"},\n",
    "            \"population\": {\"type\": \"integer\"},\n",
    "        },\n",
    "        \"required\": [\"name\", \"population\"],\n",
    "    }\n",
    ")\n",
    "\n",
    "response = client.chat.completions.create(\n",
    "    model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
    "    messages=[\n",
    "        {\"role\": \"user\", \"content\": \"Give me the information of the capital of France in the JSON format.\"},\n",
    "    ],\n",
    "    temperature=0,\n",
    "    max_tokens=128,\n",
    "    response_format={\n",
    "        \"type\": \"json_schema\",\n",
    "        \"json_schema\": {\"name\": \"foo\", \"schema\": json.loads(json_schema)},\n",
    "    },\n",
    ")\n",
    "\n",
    "print_highlight(response.choices[0].message.content)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Regular expression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "response = client.chat.completions.create(\n",
    "    model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
    "    messages=[\n",
    "        {\"role\": \"user\", \"content\": \"What is the capital of France?\"},\n",
    "    ],\n",
    "    temperature=0,\n",
    "    max_tokens=128,\n",
    "    extra_body={\"regex\": \"(Paris|London)\"},\n",
    ")\n",
    "\n",
    "print_highlight(response.choices[0].message.content)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
--- a/docs/references/sampling_params.md
+++ b/docs/references/sampling_params.md
@@ -177,3 +177,50 @@ print(response.json())
 The `image_data` can be a file name, a URL, or a base64 encoded string. See also `python/sglang/srt/utils.py:load_image`.
 Streaming is supported in a similar manner as [above](#streaming).
 ### Structured decoding (JSON, Regex)
 You can specify a JSON schema or a regular expression to constrain the model output. The model output will be guaranteed to follow the given constraints.
 ```python
 import json
 import requests
 json_schema = json.dumps(
    {
        "type": "object",
        "properties": {
            "name": {"type": "string", "pattern": "^[\\w]+$"},
            "population": {"type": "integer"},
        },
        "required": ["name", "population"],
    }
 )
 # JSON
 response = requests.post(
    "http://localhost:30000/generate",
    json={
        "text": "Here is the information of the capital of France in the JSON format.\n",
        "sampling_params": {
            "temperature": 0,
            "max_new_tokens": 64,
            "json_schema": json_schema,
        },
    },
 )
 print(response.json())
 # Regular expression
 response = requests.post(
    "http://localhost:30000/generate",
    json={
        "text": "Paris is the capital of",
        "sampling_params": {
            "temperature": 0,
            "max_new_tokens": 64,
            "regex": "(France|England)",
        },
    },
 )
 print(response.json())
 ```