Improve: Token-In Token-Out Usage for RLHF (#2843)
This commit is contained in:
@@ -348,6 +348,76 @@
|
||||
"source": [
|
||||
"terminate_process(reward_process)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Skip Tokenizer and Detokenizer\n",
|
||||
"\n",
|
||||
"SGLang Runtime also supports skip tokenizer and detokenizer. This is useful in cases like integrating with RLHF workflow."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tokenizer_free_server_process = execute_shell_command(\n",
|
||||
" \"\"\"\n",
|
||||
"python3 -m sglang.launch_server --model-path meta-llama/Llama-3.2-1B-Instruct --port=30010 --skip-tokenizer-init\n",
|
||||
"\"\"\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"wait_for_server(\"http://localhost:30010\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from transformers import AutoTokenizer\n",
|
||||
"\n",
|
||||
"tokenizer = AutoTokenizer.from_pretrained(\"meta-llama/Llama-3.2-1B-Instruct\")\n",
|
||||
"\n",
|
||||
"input_text = \"What is the capital of France?\"\n",
|
||||
"\n",
|
||||
"input_tokens = tokenizer.encode(input_text)\n",
|
||||
"print_highlight(f\"Input Text: {input_text}\")\n",
|
||||
"print_highlight(f\"Tokenized Input: {input_tokens}\")\n",
|
||||
"\n",
|
||||
"response = requests.post(\n",
|
||||
" \"http://localhost:30010/generate\",\n",
|
||||
" json={\n",
|
||||
" \"input_ids\": input_tokens,\n",
|
||||
" \"sampling_params\": {\n",
|
||||
" \"temperature\": 0,\n",
|
||||
" \"max_new_tokens\": 256,\n",
|
||||
" \"stop_token_ids\": [tokenizer.eos_token_id],\n",
|
||||
" },\n",
|
||||
" \"stream\": False,\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"output = response.json()\n",
|
||||
"output_tokens = output[\"token_ids\"]\n",
|
||||
"\n",
|
||||
"output_text = tokenizer.decode(output_tokens, skip_special_tokens=False)\n",
|
||||
"print_highlight(f\"Tokenized Output: {output_tokens}\")\n",
|
||||
"print_highlight(f\"Decoded Output: {output_text}\")\n",
|
||||
"print_highlight(f\"Output Text: {output['meta_info']['finish_reason']}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"terminate_process(tokenizer_free_server_process)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Structured Outputs (JSON, Regex, EBNF)"
|
||||
"# Structured Outputs"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -43,6 +43,10 @@
|
||||
" print_highlight,\n",
|
||||
")\n",
|
||||
"import openai\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"server_process = execute_shell_command(\n",
|
||||
" \"python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --port 30000 --host 0.0.0.0 --grammar-backend xgrammar\"\n",
|
||||
|
||||
Reference in New Issue
Block a user