Expose max total num tokens from Runtime & Engine API (#2092)
This commit is contained in:
committed by
GitHub
parent
72f87b723b
commit
c35cd1f8c7
@@ -15,6 +15,7 @@
|
||||
"- `/health_generate`\n",
|
||||
"- `/flush_cache`\n",
|
||||
"- `/get_memory_pool_size`\n",
|
||||
"- `/get_max_total_num_tokens`\n",
|
||||
"- `/update_weights`\n",
|
||||
"- `/encode`(embedding model)\n",
|
||||
"- `/classify`(reward model)\n",
|
||||
@@ -201,6 +202,29 @@
|
||||
"print_highlight(response.text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Get Maximum Total Number of Tokens\n",
|
||||
"\n",
|
||||
"Exposes the maximum number of tokens SGLang can handle based on the current configuration."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get_max_total_num_tokens\n",
|
||||
"\n",
|
||||
"url = \"http://localhost:30010/get_max_total_num_tokens\"\n",
|
||||
"\n",
|
||||
"response = requests.get(url)\n",
|
||||
"print_highlight(response.text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
Reference in New Issue
Block a user