Improve structured outputs: fix race condition, server crash, metrics and style (#6188)

2025-05-11 08:36:16 -07:00
parent 94d42b6794
commit 01bdbf7f80
13 changed files with 568 additions and 258 deletions
--- a/docs/backend/structured_outputs_for_reasoning_models.ipynb
+++ b/docs/backend/structured_outputs_for_reasoning_models.ipynb
@@ -94,8 +94,8 @@
    "    model=\"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B\",\n",
    "    messages=[\n",
    "        {\n",
-    "            \"role\": \"user\",\n",
-    "            \"content\": \"Please generate the information of the capital of France in the JSON format.\",\n",
+    "            \"role\": \"assistant\",\n",
+    "            \"content\": \"Give me the information and population of the capital of France in the JSON format.\",\n",
    "        },\n",
    "    ],\n",
    "    temperature=0,\n",
@@ -145,8 +145,8 @@
    "    model=\"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B\",\n",
    "    messages=[\n",
    "        {\n",
-    "            \"role\": \"user\",\n",
-    "            \"content\": \"Give me the information of the capital of France in the JSON format.\",\n",
+    "            \"role\": \"assistant\",\n",
+    "            \"content\": \"Give me the information and population of the capital of France in the JSON format.\",\n",
    "        },\n",
    "    ],\n",
    "    temperature=0,\n",
@@ -188,8 +188,8 @@
    "    messages=[\n",
    "        {\"role\": \"system\", \"content\": \"You are a helpful geography bot.\"},\n",
    "        {\n",
-    "            \"role\": \"user\",\n",
-    "            \"content\": \"Give me the information of the capital of France.\",\n",
+    "            \"role\": \"assistant\",\n",
+    "            \"content\": \"Give me the information and population of the capital of France in the JSON format.\",\n",
    "        },\n",
    "    ],\n",
    "    temperature=0,\n",
@@ -218,7 +218,7 @@
    "response = client.chat.completions.create(\n",
    "    model=\"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B\",\n",
    "    messages=[\n",
-    "        {\"role\": \"user\", \"content\": \"What is the capital of France?\"},\n",
+    "        {\"role\": \"assistant\", \"content\": \"What is the capital of France?\"},\n",
    "    ],\n",
    "    temperature=0,\n",
    "    max_tokens=2048,\n",
@@ -323,7 +323,7 @@
    "You are a helpful assistant.\"\"\",\n",
    "        },\n",
    "        {\n",
-    "            \"role\": \"user\",\n",
+    "            \"role\": \"assistant\",\n",
    "            \"content\": \"You are in New York. Please get the current date and time, and the weather.\",\n",
    "        },\n",
    "    ]\n",
@@ -400,9 +400,9 @@
    "\n",
    "messages = [\n",
    "    {\n",
-    "        \"role\": \"user\",\n",
-    "        \"content\": \"Here is the information of the capital of France in the JSON format.\\n\",\n",
-    "    }\n",
+    "        \"role\": \"assistant\",\n",
+    "        \"content\": \"Give me the information and population of the capital of France in the JSON format.\",\n",
+    "    },\n",
    "]\n",
    "text = tokenizer.apply_chat_template(\n",
    "    messages, tokenize=False, add_generation_prompt=True\n",
@@ -452,7 +452,9 @@
    ")\n",
    "\n",
    "# JSON\n",
-    "text = tokenizer.apply_chat_template(text, tokenize=False, add_generation_prompt=True)\n",
+    "text = tokenizer.apply_chat_template(\n",
+    "    messages, tokenize=False, add_generation_prompt=True\n",
+    ")\n",
    "response = requests.post(\n",
    "    f\"http://localhost:{port}/generate\",\n",
    "    json={\n",