diff --git a/docs/backend/native_api.ipynb b/docs/backend/native_api.ipynb index b7d90a477..04b8ec0ed 100644 --- a/docs/backend/native_api.ipynb +++ b/docs/backend/native_api.ipynb @@ -438,76 +438,6 @@ "source": [ "terminate_process(expert_record_server_process)" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Skip Tokenizer and Detokenizer\n", - "\n", - "SGLang Runtime also supports skip tokenizer and detokenizer. This is useful in cases like integrating with RLHF workflow." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tokenizer_free_server_process, port = launch_server_cmd(\n", - " \"\"\"\n", - "python3 -m sglang.launch_server --model-path qwen/qwen2.5-0.5b-instruct --skip-tokenizer-init\n", - "\"\"\"\n", - ")\n", - "\n", - "wait_for_server(f\"http://localhost:{port}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from transformers import AutoTokenizer\n", - "\n", - "tokenizer = AutoTokenizer.from_pretrained(\"qwen/qwen2.5-0.5b-instruct\")\n", - "\n", - "input_text = \"What is the capital of France?\"\n", - "\n", - "input_tokens = tokenizer.encode(input_text)\n", - "print_highlight(f\"Input Text: {input_text}\")\n", - "print_highlight(f\"Tokenized Input: {input_tokens}\")\n", - "\n", - "response = requests.post(\n", - " f\"http://localhost:{port}/generate\",\n", - " json={\n", - " \"input_ids\": input_tokens,\n", - " \"sampling_params\": {\n", - " \"temperature\": 0,\n", - " \"max_new_tokens\": 256,\n", - " \"stop_token_ids\": [tokenizer.eos_token_id],\n", - " },\n", - " \"stream\": False,\n", - " },\n", - ")\n", - "output = response.json()\n", - "output_tokens = output[\"output_ids\"]\n", - "\n", - "output_text = tokenizer.decode(output_tokens, skip_special_tokens=False)\n", - "print_highlight(f\"Tokenized Output: {output_tokens}\")\n", - "print_highlight(f\"Decoded Output: {output_text}\")\n", - "print_highlight(f\"Output Text: {output['meta_info']['finish_reason']}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "terminate_process(tokenizer_free_server_process)" - ] } ], "metadata": {