Improve structured outputs: fix race condition, server crash, metrics and style (#6188)

This commit is contained in:
Lianmin Zheng
2025-05-11 08:36:16 -07:00
committed by GitHub
parent 94d42b6794
commit 01bdbf7f80
13 changed files with 568 additions and 258 deletions

View File

@@ -94,8 +94,8 @@
" model=\"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B\",\n",
" messages=[\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"Please generate the information of the capital of France in the JSON format.\",\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Give me the information and population of the capital of France in the JSON format.\",\n",
" },\n",
" ],\n",
" temperature=0,\n",
@@ -145,8 +145,8 @@
" model=\"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B\",\n",
" messages=[\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"Give me the information of the capital of France in the JSON format.\",\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Give me the information and population of the capital of France in the JSON format.\",\n",
" },\n",
" ],\n",
" temperature=0,\n",
@@ -188,8 +188,8 @@
" messages=[\n",
" {\"role\": \"system\", \"content\": \"You are a helpful geography bot.\"},\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"Give me the information of the capital of France.\",\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Give me the information and population of the capital of France in the JSON format.\",\n",
" },\n",
" ],\n",
" temperature=0,\n",
@@ -218,7 +218,7 @@
"response = client.chat.completions.create(\n",
" model=\"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B\",\n",
" messages=[\n",
" {\"role\": \"user\", \"content\": \"What is the capital of France?\"},\n",
" {\"role\": \"assistant\", \"content\": \"What is the capital of France?\"},\n",
" ],\n",
" temperature=0,\n",
" max_tokens=2048,\n",
@@ -323,7 +323,7 @@
"You are a helpful assistant.\"\"\",\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"role\": \"assistant\",\n",
" \"content\": \"You are in New York. Please get the current date and time, and the weather.\",\n",
" },\n",
" ]\n",
@@ -400,9 +400,9 @@
"\n",
"messages = [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"Here is the information of the capital of France in the JSON format.\\n\",\n",
" }\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Give me the information and population of the capital of France in the JSON format.\",\n",
" },\n",
"]\n",
"text = tokenizer.apply_chat_template(\n",
" messages, tokenize=False, add_generation_prompt=True\n",
@@ -452,7 +452,9 @@
")\n",
"\n",
"# JSON\n",
"text = tokenizer.apply_chat_template(text, tokenize=False, add_generation_prompt=True)\n",
"text = tokenizer.apply_chat_template(\n",
" messages, tokenize=False, add_generation_prompt=True\n",
")\n",
"response = requests.post(\n",
" f\"http://localhost:{port}/generate\",\n",
" json={\n",