Remove token in token out in Native API (#5967)

2025-05-01 21:59:43 -07:00
parent 170d1f218a
commit 73dcf2b326
1 changed files with 0 additions and 70 deletions
--- a/docs/backend/native_api.ipynb
+++ b/docs/backend/native_api.ipynb
@@ -438,76 +438,6 @@
   "source": [
    "terminate_process(expert_record_server_process)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Skip Tokenizer and Detokenizer\n",
    "\n",
    "SGLang Runtime also supports skip tokenizer and detokenizer. This is useful in cases like integrating with RLHF workflow."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer_free_server_process, port = launch_server_cmd(\n",
    "    \"\"\"\n",
    "python3 -m sglang.launch_server --model-path qwen/qwen2.5-0.5b-instruct --skip-tokenizer-init\n",
    "\"\"\"\n",
    ")\n",
    "\n",
    "wait_for_server(f\"http://localhost:{port}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AutoTokenizer\n",
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"qwen/qwen2.5-0.5b-instruct\")\n",
    "\n",
    "input_text = \"What is the capital of France?\"\n",
    "\n",
    "input_tokens = tokenizer.encode(input_text)\n",
    "print_highlight(f\"Input Text: {input_text}\")\n",
    "print_highlight(f\"Tokenized Input: {input_tokens}\")\n",
    "\n",
    "response = requests.post(\n",
    "    f\"http://localhost:{port}/generate\",\n",
    "    json={\n",
    "        \"input_ids\": input_tokens,\n",
    "        \"sampling_params\": {\n",
    "            \"temperature\": 0,\n",
    "            \"max_new_tokens\": 256,\n",
    "            \"stop_token_ids\": [tokenizer.eos_token_id],\n",
    "        },\n",
    "        \"stream\": False,\n",
    "    },\n",
    ")\n",
    "output = response.json()\n",
    "output_tokens = output[\"output_ids\"]\n",
    "\n",
    "output_text = tokenizer.decode(output_tokens, skip_special_tokens=False)\n",
    "print_highlight(f\"Tokenized Output: {output_tokens}\")\n",
    "print_highlight(f\"Decoded Output: {output_text}\")\n",
    "print_highlight(f\"Output Text: {output['meta_info']['finish_reason']}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "terminate_process(tokenizer_free_server_process)"
   ]
  }
 ],
 "metadata": {