sglangv0.5.2 & support Qwen3-Next-80B-A3B-Instruct
This commit is contained in:
241
scripts/playground/frontend_reasoning.ipynb
Normal file
241
scripts/playground/frontend_reasoning.ipynb
Normal file
@@ -0,0 +1,241 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Launch A Server\n",
|
||||
"\n",
|
||||
"Launch the server with a reasoning model (Qwen 3.5-4B) and reasoning parser."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sglang import separate_reasoning, assistant_begin, assistant_end\n",
|
||||
"from sglang import assistant, function, gen, system, user\n",
|
||||
"from sglang import image\n",
|
||||
"from sglang import RuntimeEndpoint, set_default_backend\n",
|
||||
"from sglang.srt.utils import load_image\n",
|
||||
"from sglang.test.test_utils import is_in_ci\n",
|
||||
"from sglang.utils import print_highlight, terminate_process, wait_for_server\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"if is_in_ci():\n",
|
||||
" from patch import launch_server_cmd\n",
|
||||
"else:\n",
|
||||
" from sglang.utils import launch_server_cmd\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"server_process, port = launch_server_cmd(\n",
|
||||
" \"python3 -m sglang.launch_server --model-path Qwen/Qwen3-4B --reasoning-parser qwen3 --host 0.0.0.0\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"wait_for_server(f\"http://localhost:{port}\")\n",
|
||||
"print(f\"Server started on http://localhost:{port}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Set the default backend. Note: you can set chat_template_name in RontimeEndpoint. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"set_default_backend(\n",
|
||||
" RuntimeEndpoint(f\"http://localhost:{port}\", chat_template_name=\"qwen\")\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's start with a basic question-answering task. And see how the reasoning content is generated."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@function\n",
|
||||
"def basic_qa(s, question):\n",
|
||||
" s += system(f\"You are a helpful assistant than can answer questions.\")\n",
|
||||
" s += user(question)\n",
|
||||
" s += assistant_begin()\n",
|
||||
" s += gen(\"answer\", max_tokens=512)\n",
|
||||
" s += assistant_end()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"state = basic_qa(\"List 3 countries and their capitals.\")\n",
|
||||
"print_highlight(state[\"answer\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"With `separate_reasoning`, you can move the reasoning content to `{param_name}_reasoning_content` in the state."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@function\n",
|
||||
"def basic_qa_separate_reasoning(s, question):\n",
|
||||
" s += system(f\"You are a helpful assistant than can answer questions.\")\n",
|
||||
" s += user(question)\n",
|
||||
" s += assistant_begin()\n",
|
||||
" s += separate_reasoning(gen(\"answer\", max_tokens=512), model_type=\"qwen3\")\n",
|
||||
" s += assistant_end()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"reasoning_state = basic_qa_separate_reasoning(\"List 3 countries and their capitals.\")\n",
|
||||
"print_highlight(reasoning_state.stream_executor.variable_event.keys())\n",
|
||||
"print_highlight(\n",
|
||||
" f\"\\nSeparated Reasoning Content:\\n{reasoning_state['answer_reasoning_content']}\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print_highlight(f\"\\n\\nContent:\\n{reasoning_state['answer']}\")\n",
|
||||
"print_highlight(f\"\\n\\nMessages:\\n{reasoning_state.messages()[-1]}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"`separate_reasoning` can also be used in multi-turn conversations."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@function\n",
|
||||
"def multi_turn_qa(s):\n",
|
||||
" s += system(f\"You are a helpful assistant than can answer questions.\")\n",
|
||||
" s += user(\"Please give me a list of 3 countries and their capitals.\")\n",
|
||||
" s += assistant(\n",
|
||||
" separate_reasoning(gen(\"first_answer\", max_tokens=512), model_type=\"qwen3\")\n",
|
||||
" )\n",
|
||||
" s += user(\"Please give me another list of 3 countries and their capitals.\")\n",
|
||||
" s += assistant(\n",
|
||||
" separate_reasoning(gen(\"second_answer\", max_tokens=512), model_type=\"qwen3\")\n",
|
||||
" )\n",
|
||||
" return s\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"reasoning_state = multi_turn_qa()\n",
|
||||
"print_highlight(f\"\\n\\nfirst_answer:\\n{reasoning_state['first_answer']}\")\n",
|
||||
"print_highlight(\n",
|
||||
" f\"\\n\\nfirst_answer_reasoning_content:\\n{reasoning_state['first_answer_reasoning_content']}\"\n",
|
||||
")\n",
|
||||
"print_highlight(f\"\\n\\nsecond_answer:\\n{reasoning_state['second_answer']}\")\n",
|
||||
"print_highlight(\n",
|
||||
" f\"\\n\\nsecond_answer_reasoning_content:\\n{reasoning_state['second_answer_reasoning_content']}\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using No thinking as Qwen 3's advanced feature \n",
|
||||
"\n",
|
||||
"sglang separate_reasoning is particularly useful when combined with Qwen 3's advanced feature.\n",
|
||||
"\n",
|
||||
"[Qwen 3's advanced usages](https://qwenlm.github.io/blog/qwen3/#advanced-usages)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"reasoning_state = basic_qa_separate_reasoning(\n",
|
||||
" \"List 3 countries and their capitals. /no_think\"\n",
|
||||
")\n",
|
||||
"print_highlight(f\"Reasoning Content:\\n{reasoning_state['answer_reasoning_content']}\")\n",
|
||||
"print_highlight(f\"Content:\\n{reasoning_state['answer']}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"`separate_reasoning` can also be used in regular expression generation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@function\n",
|
||||
"def regular_expression_gen(s):\n",
|
||||
" s += user(\n",
|
||||
" \"What is the IP address of the Google DNS servers? just provide the answer\"\n",
|
||||
" )\n",
|
||||
" s += assistant(\n",
|
||||
" separate_reasoning(\n",
|
||||
" gen(\n",
|
||||
" \"answer\",\n",
|
||||
" temperature=0,\n",
|
||||
" regex=r\"((25[0-5]|2[0-4]\\d|[01]?\\d\\d?).){3}(25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\",\n",
|
||||
" max_tokens=512,\n",
|
||||
" ),\n",
|
||||
" model_type=\"qwen3\",\n",
|
||||
" ),\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"reasoning_state = regular_expression_gen()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print_highlight(f\"Answer:\\n{reasoning_state['answer']}\")\n",
|
||||
"print_highlight(\n",
|
||||
" f\"\\n\\nReasoning Content:\\n{reasoning_state['answer_reasoning_content']}\"\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Reference in New Issue
Block a user