[Feature] Add /tokenize and /detokenize OpenAI compatible endpoints (#9545)
This commit is contained in:
committed by
GitHub
parent
edd86b8853
commit
7c3f07dbcb
@@ -21,6 +21,8 @@
|
||||
"- `/start_expert_distribution_record`\n",
|
||||
"- `/stop_expert_distribution_record`\n",
|
||||
"- `/dump_expert_distribution_record`\n",
|
||||
"- `/tokenize`\n",
|
||||
"- `/detokenize`\n",
|
||||
"- A full list of these APIs can be found at [http_server.py](https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/entrypoints/http_server.py)\n",
|
||||
"\n",
|
||||
"We mainly use `requests` to test these APIs in the following examples. You can also use `curl`.\n"
|
||||
@@ -477,6 +479,104 @@
|
||||
"source": [
|
||||
"terminate_process(expert_record_server_process)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tokenize/Detokenize Example (Round Trip)\n",
|
||||
"\n",
|
||||
"This example demonstrates how to use the /tokenize and /detokenize endpoints together. We first tokenize a string, then detokenize the resulting IDs to reconstruct the original text. This workflow is useful when you need to handle tokenization externally but still leverage the server for detokenization."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tokenizer_free_server_process, port = launch_server_cmd(\n",
|
||||
" \"\"\"\n",
|
||||
"python3 -m sglang.launch_server --model-path qwen/qwen2.5-0.5b-instruct\n",
|
||||
"\"\"\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"wait_for_server(f\"http://localhost:{port}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"from sglang.utils import print_highlight\n",
|
||||
"\n",
|
||||
"base_url = f\"http://localhost:{port}\"\n",
|
||||
"tokenize_url = f\"{base_url}/tokenize\"\n",
|
||||
"detokenize_url = f\"{base_url}/detokenize\"\n",
|
||||
"\n",
|
||||
"model_name = \"qwen/qwen2.5-0.5b-instruct\"\n",
|
||||
"input_text = \"SGLang provides efficient tokenization endpoints.\"\n",
|
||||
"print_highlight(f\"Original Input Text:\\n'{input_text}'\")\n",
|
||||
"\n",
|
||||
"# --- tokenize the input text ---\n",
|
||||
"tokenize_payload = {\n",
|
||||
" \"model\": model_name,\n",
|
||||
" \"prompt\": input_text,\n",
|
||||
" \"add_special_tokens\": False,\n",
|
||||
"}\n",
|
||||
"try:\n",
|
||||
" tokenize_response = requests.post(tokenize_url, json=tokenize_payload)\n",
|
||||
" tokenize_response.raise_for_status()\n",
|
||||
" tokenization_result = tokenize_response.json()\n",
|
||||
" token_ids = tokenization_result.get(\"tokens\")\n",
|
||||
"\n",
|
||||
" if not token_ids:\n",
|
||||
" raise ValueError(\"Tokenization returned empty tokens.\")\n",
|
||||
"\n",
|
||||
" print_highlight(f\"\\nTokenized Output (IDs):\\n{token_ids}\")\n",
|
||||
" print_highlight(f\"Token Count: {tokenization_result.get('count')}\")\n",
|
||||
" print_highlight(f\"Max Model Length: {tokenization_result.get('max_model_len')}\")\n",
|
||||
"\n",
|
||||
" # --- detokenize the obtained token IDs ---\n",
|
||||
" detokenize_payload = {\n",
|
||||
" \"model\": model_name,\n",
|
||||
" \"tokens\": token_ids,\n",
|
||||
" \"skip_special_tokens\": True,\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" detokenize_response = requests.post(detokenize_url, json=detokenize_payload)\n",
|
||||
" detokenize_response.raise_for_status()\n",
|
||||
" detokenization_result = detokenize_response.json()\n",
|
||||
" reconstructed_text = detokenization_result.get(\"text\")\n",
|
||||
"\n",
|
||||
" print_highlight(f\"\\nDetokenized Output (Text):\\n'{reconstructed_text}'\")\n",
|
||||
"\n",
|
||||
" if input_text == reconstructed_text:\n",
|
||||
" print_highlight(\n",
|
||||
" \"\\nRound Trip Successful: Original and reconstructed text match.\"\n",
|
||||
" )\n",
|
||||
" else:\n",
|
||||
" print_highlight(\n",
|
||||
" \"\\nRound Trip Mismatch: Original and reconstructed text differ.\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"except requests.exceptions.RequestException as e:\n",
|
||||
" print_highlight(f\"\\nHTTP Request Error: {e}\")\n",
|
||||
"except Exception as e:\n",
|
||||
" print_highlight(f\"\\nAn error occurred: {e}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"terminate_process(tokenizer_free_server_process)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -493,5 +593,5 @@
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user