[Feature] Add /tokenize and /detokenize OpenAI compatible endpoints (#9545)

This commit is contained in:
Adarsh Shirawalmath
2025-10-08 10:08:48 +05:30
committed by GitHub
parent edd86b8853
commit 7c3f07dbcb
5 changed files with 434 additions and 1 deletions

View File

@@ -21,6 +21,8 @@
"- `/start_expert_distribution_record`\n",
"- `/stop_expert_distribution_record`\n",
"- `/dump_expert_distribution_record`\n",
"- `/tokenize`\n",
"- `/detokenize`\n",
"- A full list of these APIs can be found at [http_server.py](https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/entrypoints/http_server.py)\n",
"\n",
"We mainly use `requests` to test these APIs in the following examples. You can also use `curl`.\n"
@@ -477,6 +479,104 @@
"source": [
"terminate_process(expert_record_server_process)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Tokenize/Detokenize Example (Round Trip)\n",
"\n",
"This example demonstrates how to use the /tokenize and /detokenize endpoints together. We first tokenize a string, then detokenize the resulting IDs to reconstruct the original text. This workflow is useful when you need to handle tokenization externally but still leverage the server for detokenization."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tokenizer_free_server_process, port = launch_server_cmd(\n",
" \"\"\"\n",
"python3 -m sglang.launch_server --model-path qwen/qwen2.5-0.5b-instruct\n",
"\"\"\"\n",
")\n",
"\n",
"wait_for_server(f\"http://localhost:{port}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"from sglang.utils import print_highlight\n",
"\n",
"base_url = f\"http://localhost:{port}\"\n",
"tokenize_url = f\"{base_url}/tokenize\"\n",
"detokenize_url = f\"{base_url}/detokenize\"\n",
"\n",
"model_name = \"qwen/qwen2.5-0.5b-instruct\"\n",
"input_text = \"SGLang provides efficient tokenization endpoints.\"\n",
"print_highlight(f\"Original Input Text:\\n'{input_text}'\")\n",
"\n",
"# --- tokenize the input text ---\n",
"tokenize_payload = {\n",
" \"model\": model_name,\n",
" \"prompt\": input_text,\n",
" \"add_special_tokens\": False,\n",
"}\n",
"try:\n",
" tokenize_response = requests.post(tokenize_url, json=tokenize_payload)\n",
" tokenize_response.raise_for_status()\n",
" tokenization_result = tokenize_response.json()\n",
" token_ids = tokenization_result.get(\"tokens\")\n",
"\n",
" if not token_ids:\n",
" raise ValueError(\"Tokenization returned empty tokens.\")\n",
"\n",
" print_highlight(f\"\\nTokenized Output (IDs):\\n{token_ids}\")\n",
" print_highlight(f\"Token Count: {tokenization_result.get('count')}\")\n",
" print_highlight(f\"Max Model Length: {tokenization_result.get('max_model_len')}\")\n",
"\n",
" # --- detokenize the obtained token IDs ---\n",
" detokenize_payload = {\n",
" \"model\": model_name,\n",
" \"tokens\": token_ids,\n",
" \"skip_special_tokens\": True,\n",
" }\n",
"\n",
" detokenize_response = requests.post(detokenize_url, json=detokenize_payload)\n",
" detokenize_response.raise_for_status()\n",
" detokenization_result = detokenize_response.json()\n",
" reconstructed_text = detokenization_result.get(\"text\")\n",
"\n",
" print_highlight(f\"\\nDetokenized Output (Text):\\n'{reconstructed_text}'\")\n",
"\n",
" if input_text == reconstructed_text:\n",
" print_highlight(\n",
" \"\\nRound Trip Successful: Original and reconstructed text match.\"\n",
" )\n",
" else:\n",
" print_highlight(\n",
" \"\\nRound Trip Mismatch: Original and reconstructed text differ.\"\n",
" )\n",
"\n",
"except requests.exceptions.RequestException as e:\n",
" print_highlight(f\"\\nHTTP Request Error: {e}\")\n",
"except Exception as e:\n",
" print_highlight(f\"\\nAn error occurred: {e}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"terminate_process(tokenizer_free_server_process)"
]
}
],
"metadata": {
@@ -493,5 +593,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}