Remove token in token out in Native API (#5967)
This commit is contained in:
@@ -438,76 +438,6 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"terminate_process(expert_record_server_process)"
|
"terminate_process(expert_record_server_process)"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Skip Tokenizer and Detokenizer\n",
|
|
||||||
"\n",
|
|
||||||
"SGLang Runtime also supports skip tokenizer and detokenizer. This is useful in cases like integrating with RLHF workflow."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"tokenizer_free_server_process, port = launch_server_cmd(\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
"python3 -m sglang.launch_server --model-path qwen/qwen2.5-0.5b-instruct --skip-tokenizer-init\n",
|
|
||||||
"\"\"\"\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"wait_for_server(f\"http://localhost:{port}\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from transformers import AutoTokenizer\n",
|
|
||||||
"\n",
|
|
||||||
"tokenizer = AutoTokenizer.from_pretrained(\"qwen/qwen2.5-0.5b-instruct\")\n",
|
|
||||||
"\n",
|
|
||||||
"input_text = \"What is the capital of France?\"\n",
|
|
||||||
"\n",
|
|
||||||
"input_tokens = tokenizer.encode(input_text)\n",
|
|
||||||
"print_highlight(f\"Input Text: {input_text}\")\n",
|
|
||||||
"print_highlight(f\"Tokenized Input: {input_tokens}\")\n",
|
|
||||||
"\n",
|
|
||||||
"response = requests.post(\n",
|
|
||||||
" f\"http://localhost:{port}/generate\",\n",
|
|
||||||
" json={\n",
|
|
||||||
" \"input_ids\": input_tokens,\n",
|
|
||||||
" \"sampling_params\": {\n",
|
|
||||||
" \"temperature\": 0,\n",
|
|
||||||
" \"max_new_tokens\": 256,\n",
|
|
||||||
" \"stop_token_ids\": [tokenizer.eos_token_id],\n",
|
|
||||||
" },\n",
|
|
||||||
" \"stream\": False,\n",
|
|
||||||
" },\n",
|
|
||||||
")\n",
|
|
||||||
"output = response.json()\n",
|
|
||||||
"output_tokens = output[\"output_ids\"]\n",
|
|
||||||
"\n",
|
|
||||||
"output_text = tokenizer.decode(output_tokens, skip_special_tokens=False)\n",
|
|
||||||
"print_highlight(f\"Tokenized Output: {output_tokens}\")\n",
|
|
||||||
"print_highlight(f\"Decoded Output: {output_text}\")\n",
|
|
||||||
"print_highlight(f\"Output Text: {output['meta_info']['finish_reason']}\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"terminate_process(tokenizer_free_server_process)"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|||||||
Reference in New Issue
Block a user