From 02755768d32765eb49f9fa1499ed841c3aab7edb Mon Sep 17 00:00:00 2001 From: Chayenne Date: Mon, 4 Nov 2024 23:53:44 -0800 Subject: [PATCH] Change judge to classify & Modify make file (#1920) --- docs/Makefile | 2 +- docs/backend/native_api.ipynb | 143 +++++++++++++++++++--- docs/backend/offline_engine_api.ipynb | 56 +++++++-- docs/backend/openai_api_completions.ipynb | 109 ++++++++++------- docs/backend/openai_api_embeddings.ipynb | 49 ++++---- docs/backend/openai_api_vision.ipynb | 56 +++++++-- docs/start/send_request.ipynb | 70 +++++++---- examples/runtime/reward_model.py | 2 +- python/sglang/srt/server.py | 6 +- 9 files changed, 369 insertions(+), 124 deletions(-) diff --git a/docs/Makefile b/docs/Makefile index 51446dc38..50f77a30c 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -14,7 +14,7 @@ help: # New target to compile Markdown and Jupyter Notebook files compile: - find $(SOURCEDIR) -name '*.ipynb' | while read nb; do \ + find $(SOURCEDIR) -path "*/_build/*" -prune -o -name "*.ipynb" -print | while read nb; do \ if [ -f "$$nb" ]; then \ echo "Executing $$nb"; \ jupyter nbconvert --to notebook --execute --inplace "$$nb" \ diff --git a/docs/backend/native_api.ipynb b/docs/backend/native_api.ipynb index 95af6344a..f84e02b59 100644 --- a/docs/backend/native_api.ipynb +++ b/docs/backend/native_api.ipynb @@ -17,7 +17,7 @@ "- `/get_memory_pool_size`\n", "- `/update_weights`\n", "- `/encode`(embedding model)\n", - "- `/judge`(reward model)\n", + "- `/classify`(reward model)\n", "\n", "We mainly use `requests` to test these APIs in the following examples. You can also use `curl`." ] @@ -32,7 +32,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:08:08.536886Z", + "iopub.status.busy": "2024-11-05T05:08:08.536763Z", + "iopub.status.idle": "2024-11-05T05:08:34.725831Z", + "shell.execute_reply": "2024-11-05T05:08:34.725316Z" + } + }, "outputs": [], "source": [ "from sglang.utils import (\n", @@ -64,7 +71,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:08:34.727530Z", + "iopub.status.busy": "2024-11-05T05:08:34.727333Z", + "iopub.status.idle": "2024-11-05T05:08:35.359784Z", + "shell.execute_reply": "2024-11-05T05:08:35.359090Z" + } + }, "outputs": [], "source": [ "url = \"http://localhost:30010/generate\"\n", @@ -85,7 +99,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:08:35.362286Z", + "iopub.status.busy": "2024-11-05T05:08:35.362140Z", + "iopub.status.idle": "2024-11-05T05:08:35.368711Z", + "shell.execute_reply": "2024-11-05T05:08:35.368220Z" + } + }, "outputs": [], "source": [ "url = \"http://localhost:30010/get_server_args\"\n", @@ -109,7 +130,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:08:35.371313Z", + "iopub.status.busy": "2024-11-05T05:08:35.370877Z", + "iopub.status.idle": "2024-11-05T05:08:35.376712Z", + "shell.execute_reply": "2024-11-05T05:08:35.376230Z" + } + }, "outputs": [], "source": [ "url = \"http://localhost:30010/get_model_info\"\n", @@ -134,7 +162,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:08:35.378982Z", + "iopub.status.busy": "2024-11-05T05:08:35.378597Z", + "iopub.status.idle": "2024-11-05T05:08:35.391820Z", + "shell.execute_reply": "2024-11-05T05:08:35.391336Z" + } + }, "outputs": [], "source": [ "url = \"http://localhost:30010/health_generate\"\n", @@ -146,7 +181,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:08:35.393748Z", + "iopub.status.busy": "2024-11-05T05:08:35.393606Z", + "iopub.status.idle": "2024-11-05T05:08:35.398645Z", + "shell.execute_reply": "2024-11-05T05:08:35.398145Z" + } + }, "outputs": [], "source": [ "url = \"http://localhost:30010/health\"\n", @@ -167,7 +209,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:08:35.400683Z", + "iopub.status.busy": "2024-11-05T05:08:35.400419Z", + "iopub.status.idle": "2024-11-05T05:08:35.406146Z", + "shell.execute_reply": "2024-11-05T05:08:35.405661Z" + } + }, "outputs": [], "source": [ "# flush cache\n", @@ -190,7 +239,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:08:35.408176Z", + "iopub.status.busy": "2024-11-05T05:08:35.407884Z", + "iopub.status.idle": "2024-11-05T05:08:35.413587Z", + "shell.execute_reply": "2024-11-05T05:08:35.413108Z" + } + }, "outputs": [], "source": [ "# get_memory_pool_size\n", @@ -213,7 +269,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:08:35.416090Z", + "iopub.status.busy": "2024-11-05T05:08:35.415793Z", + "iopub.status.idle": "2024-11-05T05:08:36.552549Z", + "shell.execute_reply": "2024-11-05T05:08:36.551870Z" + } + }, "outputs": [], "source": [ "# successful update with same architecture and size\n", @@ -231,7 +294,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:08:36.554823Z", + "iopub.status.busy": "2024-11-05T05:08:36.554680Z", + "iopub.status.idle": "2024-11-05T05:08:38.053945Z", + "shell.execute_reply": "2024-11-05T05:08:38.053034Z" + } + }, "outputs": [], "source": [ "# failed update with different parameter size\n", @@ -263,7 +333,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:08:38.056783Z", + "iopub.status.busy": "2024-11-05T05:08:38.056497Z", + "iopub.status.idle": "2024-11-05T05:09:04.436030Z", + "shell.execute_reply": "2024-11-05T05:09:04.435311Z" + } + }, "outputs": [], "source": [ "terminate_process(server_process)\n", @@ -281,7 +358,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:09:04.438987Z", + "iopub.status.busy": "2024-11-05T05:09:04.438568Z", + "iopub.status.idle": "2024-11-05T05:09:04.485291Z", + "shell.execute_reply": "2024-11-05T05:09:04.484829Z" + } + }, "outputs": [], "source": [ "# successful encode for embedding model\n", @@ -298,15 +382,22 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Judge (reward model)\n", + "## Classify (reward model)\n", "\n", - "SGLang Runtime also supports reward models. Here we use a reward model to judge the quality of pairwise generations." + "SGLang Runtime also supports reward models. Here we use a reward model to classify the quality of pairwise generations." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:09:04.487191Z", + "iopub.status.busy": "2024-11-05T05:09:04.486929Z", + "iopub.status.idle": "2024-11-05T05:09:25.553481Z", + "shell.execute_reply": "2024-11-05T05:09:25.552747Z" + } + }, "outputs": [], "source": [ "terminate_process(embedding_process)\n", @@ -326,7 +417,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:09:25.555813Z", + "iopub.status.busy": "2024-11-05T05:09:25.555666Z", + "iopub.status.idle": "2024-11-05T05:09:26.354372Z", + "shell.execute_reply": "2024-11-05T05:09:26.353693Z" + } + }, "outputs": [], "source": [ "from transformers import AutoTokenizer\n", @@ -346,7 +444,7 @@ "tokenizer = AutoTokenizer.from_pretrained(\"Skywork/Skywork-Reward-Llama-3.1-8B-v0.2\")\n", "prompts = tokenizer.apply_chat_template(CONVS, tokenize=False)\n", "\n", - "url = \"http://localhost:30030/judge\"\n", + "url = \"http://localhost:30030/classify\"\n", "data = {\n", " \"model\": \"Skywork/Skywork-Reward-Llama-3.1-8B-v0.2\", \n", " \"text\": prompts\n", @@ -360,7 +458,14 @@ { "cell_type": "code", "execution_count": 15, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:09:26.356532Z", + "iopub.status.busy": "2024-11-05T05:09:26.356327Z", + "iopub.status.idle": "2024-11-05T05:09:26.396590Z", + "shell.execute_reply": "2024-11-05T05:09:26.395914Z" + } + }, "outputs": [], "source": [ "terminate_process(reward_process)" diff --git a/docs/backend/offline_engine_api.ipynb b/docs/backend/offline_engine_api.ipynb index 63a175ffa..4ca10a718 100644 --- a/docs/backend/offline_engine_api.ipynb +++ b/docs/backend/offline_engine_api.ipynb @@ -33,7 +33,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:21:27.503026Z", + "iopub.status.busy": "2024-11-05T05:21:27.502741Z", + "iopub.status.idle": "2024-11-05T05:21:49.554631Z", + "shell.execute_reply": "2024-11-05T05:21:49.553690Z" + } + }, "outputs": [], "source": [ "# launch the offline engine\n", @@ -55,7 +62,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:21:49.558275Z", + "iopub.status.busy": "2024-11-05T05:21:49.558110Z", + "iopub.status.idle": "2024-11-05T05:21:52.717287Z", + "shell.execute_reply": "2024-11-05T05:21:52.716842Z" + } + }, "outputs": [], "source": [ "prompts = [\n", @@ -83,7 +97,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:21:52.721738Z", + "iopub.status.busy": "2024-11-05T05:21:52.720908Z", + "iopub.status.idle": "2024-11-05T05:22:01.770341Z", + "shell.execute_reply": "2024-11-05T05:22:01.769510Z" + } + }, "outputs": [], "source": [ "prompts = [\n", @@ -114,7 +135,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:22:01.772662Z", + "iopub.status.busy": "2024-11-05T05:22:01.772377Z", + "iopub.status.idle": "2024-11-05T05:22:04.897499Z", + "shell.execute_reply": "2024-11-05T05:22:04.896867Z" + } + }, "outputs": [], "source": [ "prompts = [\n", @@ -149,7 +177,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:22:04.899754Z", + "iopub.status.busy": "2024-11-05T05:22:04.899478Z", + "iopub.status.idle": "2024-11-05T05:22:13.970245Z", + "shell.execute_reply": "2024-11-05T05:22:13.969779Z" + } + }, "outputs": [], "source": [ "prompts = [\n", @@ -178,8 +213,15 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 6, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:22:13.972039Z", + "iopub.status.busy": "2024-11-05T05:22:13.971846Z", + "iopub.status.idle": "2024-11-05T05:22:14.027421Z", + "shell.execute_reply": "2024-11-05T05:22:14.027003Z" + } + }, "outputs": [], "source": [ "llm.shutdown()" diff --git a/docs/backend/openai_api_completions.ipynb b/docs/backend/openai_api_completions.ipynb index e39b328f1..1dfa53129 100644 --- a/docs/backend/openai_api_completions.ipynb +++ b/docs/backend/openai_api_completions.ipynb @@ -37,7 +37,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:09:30.637832Z", + "iopub.status.busy": "2024-11-05T05:09:30.637709Z", + "iopub.status.idle": "2024-11-05T05:09:58.830158Z", + "shell.execute_reply": "2024-11-05T05:09:58.829395Z" + } + }, "outputs": [], "source": [ "from sglang.utils import (\n", @@ -72,10 +79,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T02:45:16.624550Z", - "iopub.status.busy": "2024-11-01T02:45:16.624258Z", - "iopub.status.idle": "2024-11-01T02:45:18.087455Z", - "shell.execute_reply": "2024-11-01T02:45:18.086450Z" + "iopub.execute_input": "2024-11-05T05:09:58.833008Z", + "iopub.status.busy": "2024-11-05T05:09:58.832805Z", + "iopub.status.idle": "2024-11-05T05:10:00.187146Z", + "shell.execute_reply": "2024-11-05T05:10:00.186657Z" } }, "outputs": [], @@ -112,10 +119,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T02:45:18.090228Z", - "iopub.status.busy": "2024-11-01T02:45:18.090071Z", - "iopub.status.idle": "2024-11-01T02:45:21.193221Z", - "shell.execute_reply": "2024-11-01T02:45:21.192539Z" + "iopub.execute_input": "2024-11-05T05:10:00.189444Z", + "iopub.status.busy": "2024-11-05T05:10:00.189289Z", + "iopub.status.idle": "2024-11-05T05:10:03.291891Z", + "shell.execute_reply": "2024-11-05T05:10:03.291173Z" } }, "outputs": [], @@ -158,10 +165,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T02:45:21.195226Z", - "iopub.status.busy": "2024-11-01T02:45:21.194680Z", - "iopub.status.idle": "2024-11-01T02:45:21.675473Z", - "shell.execute_reply": "2024-11-01T02:45:21.675050Z" + "iopub.execute_input": "2024-11-05T05:10:03.294389Z", + "iopub.status.busy": "2024-11-05T05:10:03.294237Z", + "iopub.status.idle": "2024-11-05T05:10:03.469357Z", + "shell.execute_reply": "2024-11-05T05:10:03.468661Z" } }, "outputs": [], @@ -191,10 +198,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T02:45:21.676813Z", - "iopub.status.busy": "2024-11-01T02:45:21.676665Z", - "iopub.status.idle": "2024-11-01T02:45:23.182104Z", - "shell.execute_reply": "2024-11-01T02:45:23.181695Z" + "iopub.execute_input": "2024-11-05T05:10:03.471573Z", + "iopub.status.busy": "2024-11-05T05:10:03.471430Z", + "iopub.status.idle": "2024-11-05T05:10:04.977081Z", + "shell.execute_reply": "2024-11-05T05:10:04.976391Z" } }, "outputs": [], @@ -227,10 +234,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T02:45:23.186337Z", - "iopub.status.busy": "2024-11-01T02:45:23.186189Z", - "iopub.status.idle": "2024-11-01T02:45:26.769744Z", - "shell.execute_reply": "2024-11-01T02:45:26.769299Z" + "iopub.execute_input": "2024-11-05T05:10:04.979428Z", + "iopub.status.busy": "2024-11-05T05:10:04.979272Z", + "iopub.status.idle": "2024-11-05T05:10:08.568761Z", + "shell.execute_reply": "2024-11-05T05:10:08.568355Z" } }, "outputs": [], @@ -264,7 +271,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:10:08.571102Z", + "iopub.status.busy": "2024-11-05T05:10:08.570964Z", + "iopub.status.idle": "2024-11-05T05:10:23.214087Z", + "shell.execute_reply": "2024-11-05T05:10:23.213664Z" + } + }, "outputs": [], "source": [ "import json\n", @@ -306,7 +320,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:10:23.216229Z", + "iopub.status.busy": "2024-11-05T05:10:23.216076Z", + "iopub.status.idle": "2024-11-05T05:10:23.884236Z", + "shell.execute_reply": "2024-11-05T05:10:23.883897Z" + } + }, "outputs": [], "source": [ "response = client.chat.completions.create(\n", @@ -344,10 +365,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T02:45:26.772016Z", - "iopub.status.busy": "2024-11-01T02:45:26.771868Z", - "iopub.status.idle": "2024-11-01T02:45:26.794225Z", - "shell.execute_reply": "2024-11-01T02:45:26.793811Z" + "iopub.execute_input": "2024-11-05T05:10:23.886276Z", + "iopub.status.busy": "2024-11-05T05:10:23.886136Z", + "iopub.status.idle": "2024-11-05T05:10:23.905880Z", + "shell.execute_reply": "2024-11-05T05:10:23.905529Z" } }, "outputs": [], @@ -406,10 +427,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T02:45:26.796422Z", - "iopub.status.busy": "2024-11-01T02:45:26.796273Z", - "iopub.status.idle": "2024-11-01T02:45:29.810471Z", - "shell.execute_reply": "2024-11-01T02:45:29.810041Z" + "iopub.execute_input": "2024-11-05T05:10:23.907468Z", + "iopub.status.busy": "2024-11-05T05:10:23.907247Z", + "iopub.status.idle": "2024-11-05T05:10:26.920212Z", + "shell.execute_reply": "2024-11-05T05:10:26.919865Z" } }, "outputs": [], @@ -461,10 +482,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T02:45:29.812339Z", - "iopub.status.busy": "2024-11-01T02:45:29.812198Z", - "iopub.status.idle": "2024-11-01T02:45:54.851243Z", - "shell.execute_reply": "2024-11-01T02:45:54.850668Z" + "iopub.execute_input": "2024-11-05T05:10:26.922675Z", + "iopub.status.busy": "2024-11-05T05:10:26.922413Z", + "iopub.status.idle": "2024-11-05T05:10:51.961703Z", + "shell.execute_reply": "2024-11-05T05:10:51.960846Z" } }, "outputs": [], @@ -544,10 +565,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T02:45:54.854018Z", - "iopub.status.busy": "2024-11-01T02:45:54.853851Z", - "iopub.status.idle": "2024-11-01T02:46:07.893199Z", - "shell.execute_reply": "2024-11-01T02:46:07.892310Z" + "iopub.execute_input": "2024-11-05T05:10:51.964749Z", + "iopub.status.busy": "2024-11-05T05:10:51.964215Z", + "iopub.status.idle": "2024-11-05T05:11:05.023450Z", + "shell.execute_reply": "2024-11-05T05:11:05.023101Z" } }, "outputs": [], @@ -636,13 +657,13 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T02:46:07.896114Z", - "iopub.status.busy": "2024-11-01T02:46:07.895820Z", - "iopub.status.idle": "2024-11-01T02:46:09.365287Z", - "shell.execute_reply": "2024-11-01T02:46:09.364705Z" + "iopub.execute_input": "2024-11-05T05:11:05.024877Z", + "iopub.status.busy": "2024-11-05T05:11:05.024561Z", + "iopub.status.idle": "2024-11-05T05:11:06.358695Z", + "shell.execute_reply": "2024-11-05T05:11:06.357635Z" } }, "outputs": [], diff --git a/docs/backend/openai_api_embeddings.ipynb b/docs/backend/openai_api_embeddings.ipynb index 54b48d60c..a221c16eb 100644 --- a/docs/backend/openai_api_embeddings.ipynb +++ b/docs/backend/openai_api_embeddings.ipynb @@ -35,10 +35,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T02:47:32.337369Z", - "iopub.status.busy": "2024-11-01T02:47:32.337032Z", - "iopub.status.idle": "2024-11-01T02:47:59.540926Z", - "shell.execute_reply": "2024-11-01T02:47:59.539861Z" + "iopub.execute_input": "2024-11-05T05:22:17.227174Z", + "iopub.status.busy": "2024-11-05T05:22:17.226952Z", + "iopub.status.idle": "2024-11-05T05:22:42.445791Z", + "shell.execute_reply": "2024-11-05T05:22:42.444980Z" } }, "outputs": [], @@ -72,10 +72,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T02:47:59.543958Z", - "iopub.status.busy": "2024-11-01T02:47:59.543670Z", - "iopub.status.idle": "2024-11-01T02:47:59.591699Z", - "shell.execute_reply": "2024-11-01T02:47:59.590809Z" + "iopub.execute_input": "2024-11-05T05:22:42.448147Z", + "iopub.status.busy": "2024-11-05T05:22:42.447775Z", + "iopub.status.idle": "2024-11-05T05:22:42.495311Z", + "shell.execute_reply": "2024-11-05T05:22:42.495027Z" } }, "outputs": [], @@ -104,7 +104,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:22:42.496666Z", + "iopub.status.busy": "2024-11-05T05:22:42.496524Z", + "iopub.status.idle": "2024-11-05T05:22:42.540687Z", + "shell.execute_reply": "2024-11-05T05:22:42.540060Z" + } + }, "outputs": [], "source": [ "import requests\n", @@ -133,10 +140,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T02:47:59.594229Z", - "iopub.status.busy": "2024-11-01T02:47:59.594049Z", - "iopub.status.idle": "2024-11-01T02:48:00.006233Z", - "shell.execute_reply": "2024-11-01T02:48:00.005255Z" + "iopub.execute_input": "2024-11-05T05:22:42.542551Z", + "iopub.status.busy": "2024-11-05T05:22:42.542282Z", + "iopub.status.idle": "2024-11-05T05:22:42.928542Z", + "shell.execute_reply": "2024-11-05T05:22:42.928181Z" } }, "outputs": [], @@ -169,10 +176,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T02:48:00.008858Z", - "iopub.status.busy": "2024-11-01T02:48:00.008689Z", - "iopub.status.idle": "2024-11-01T02:48:01.872542Z", - "shell.execute_reply": "2024-11-01T02:48:01.871573Z" + "iopub.execute_input": "2024-11-05T05:22:42.930093Z", + "iopub.status.busy": "2024-11-05T05:22:42.929954Z", + "iopub.status.idle": "2024-11-05T05:22:44.799945Z", + "shell.execute_reply": "2024-11-05T05:22:44.799562Z" } }, "outputs": [], @@ -201,10 +208,10 @@ "execution_count": 6, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T02:48:01.875204Z", - "iopub.status.busy": "2024-11-01T02:48:01.874915Z", - "iopub.status.idle": "2024-11-01T02:48:02.193734Z", - "shell.execute_reply": "2024-11-01T02:48:02.192158Z" + "iopub.execute_input": "2024-11-05T05:22:44.801418Z", + "iopub.status.busy": "2024-11-05T05:22:44.801192Z", + "iopub.status.idle": "2024-11-05T05:22:45.094634Z", + "shell.execute_reply": "2024-11-05T05:22:45.093950Z" } }, "outputs": [], diff --git a/docs/backend/openai_api_vision.ipynb b/docs/backend/openai_api_vision.ipynb index eb06e55ed..cbbba8c12 100644 --- a/docs/backend/openai_api_vision.ipynb +++ b/docs/backend/openai_api_vision.ipynb @@ -37,7 +37,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:22:49.320999Z", + "iopub.status.busy": "2024-11-05T05:22:49.320880Z", + "iopub.status.idle": "2024-11-05T05:23:21.537478Z", + "shell.execute_reply": "2024-11-05T05:23:21.536956Z" + } + }, "outputs": [], "source": [ "from sglang.utils import (\n", @@ -69,7 +76,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:23:21.539953Z", + "iopub.status.busy": "2024-11-05T05:23:21.539100Z", + "iopub.status.idle": "2024-11-05T05:23:25.880179Z", + "shell.execute_reply": "2024-11-05T05:23:25.879744Z" + } + }, "outputs": [], "source": [ "import subprocess\n", @@ -113,7 +127,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:23:25.881742Z", + "iopub.status.busy": "2024-11-05T05:23:25.881595Z", + "iopub.status.idle": "2024-11-05T05:23:26.758503Z", + "shell.execute_reply": "2024-11-05T05:23:26.758084Z" + } + }, "outputs": [], "source": [ "import requests\n", @@ -153,7 +174,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:23:26.760098Z", + "iopub.status.busy": "2024-11-05T05:23:26.759955Z", + "iopub.status.idle": "2024-11-05T05:23:27.849510Z", + "shell.execute_reply": "2024-11-05T05:23:27.849117Z" + } + }, "outputs": [], "source": [ "from openai import OpenAI\n", @@ -197,7 +225,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:23:27.850994Z", + "iopub.status.busy": "2024-11-05T05:23:27.850864Z", + "iopub.status.idle": "2024-11-05T05:23:31.609137Z", + "shell.execute_reply": "2024-11-05T05:23:31.608748Z" + } + }, "outputs": [], "source": [ "from openai import OpenAI\n", @@ -238,8 +273,15 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 6, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:23:31.610683Z", + "iopub.status.busy": "2024-11-05T05:23:31.610560Z", + "iopub.status.idle": "2024-11-05T05:23:32.965146Z", + "shell.execute_reply": "2024-11-05T05:23:32.963922Z" + } + }, "outputs": [], "source": [ "terminate_process(embedding_process)" diff --git a/docs/start/send_request.ipynb b/docs/start/send_request.ipynb index 24d39bd71..30aafbe90 100644 --- a/docs/start/send_request.ipynb +++ b/docs/start/send_request.ipynb @@ -9,7 +9,7 @@ "\n", "- For Vision Language Models, see [OpenAI APIs - Vision](../backend/openai_api_vision.ipynb).\n", "- For Embedding Models, see [OpenAI APIs - Embedding](../backend/openai_api_embeddings.ipynb) and [Encode (embedding model)](../backend/native_api.html#Encode-(embedding-model)).\n", - "- For Reward Models, see [Judge (reward model)](../backend/native_api.html#Judge-(reward-model))." + "- For Reward Models, see [Classify (reward model)](../backend/native_api.html#Classify-(reward-model))." ] }, { @@ -33,10 +33,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T02:46:13.611212Z", - "iopub.status.busy": "2024-11-01T02:46:13.611093Z", - "iopub.status.idle": "2024-11-01T02:46:42.810261Z", - "shell.execute_reply": "2024-11-01T02:46:42.809147Z" + "iopub.execute_input": "2024-11-05T05:11:10.680191Z", + "iopub.status.busy": "2024-11-05T05:11:10.679710Z", + "iopub.status.idle": "2024-11-05T05:11:39.882385Z", + "shell.execute_reply": "2024-11-05T05:11:39.881827Z" } }, "outputs": [], @@ -68,7 +68,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:11:39.883923Z", + "iopub.status.busy": "2024-11-05T05:11:39.883721Z", + "iopub.status.idle": "2024-11-05T05:11:40.124980Z", + "shell.execute_reply": "2024-11-05T05:11:40.124557Z" + } + }, "outputs": [], "source": [ "import subprocess, json\n", @@ -94,10 +101,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T02:46:42.813656Z", - "iopub.status.busy": "2024-11-01T02:46:42.813354Z", - "iopub.status.idle": "2024-11-01T02:46:51.436613Z", - "shell.execute_reply": "2024-11-01T02:46:51.435965Z" + "iopub.execute_input": "2024-11-05T05:11:40.126564Z", + "iopub.status.busy": "2024-11-05T05:11:40.126369Z", + "iopub.status.idle": "2024-11-05T05:11:40.324316Z", + "shell.execute_reply": "2024-11-05T05:11:40.323693Z" } }, "outputs": [], @@ -129,10 +136,10 @@ "execution_count": null, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T02:46:51.439372Z", - "iopub.status.busy": "2024-11-01T02:46:51.439178Z", - "iopub.status.idle": "2024-11-01T02:46:52.895776Z", - "shell.execute_reply": "2024-11-01T02:46:52.895318Z" + "iopub.execute_input": "2024-11-05T05:11:40.327043Z", + "iopub.status.busy": "2024-11-05T05:11:40.326759Z", + "iopub.status.idle": "2024-11-05T05:11:41.687336Z", + "shell.execute_reply": "2024-11-05T05:11:41.686855Z" } }, "outputs": [], @@ -162,7 +169,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:11:41.688676Z", + "iopub.status.busy": "2024-11-05T05:11:41.688527Z", + "iopub.status.idle": "2024-11-05T05:11:42.717140Z", + "shell.execute_reply": "2024-11-05T05:11:42.716452Z" + } + }, "outputs": [], "source": [ "import openai\n", @@ -198,7 +212,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:11:42.720467Z", + "iopub.status.busy": "2024-11-05T05:11:42.720182Z", + "iopub.status.idle": "2024-11-05T05:11:43.480765Z", + "shell.execute_reply": "2024-11-05T05:11:43.480143Z" + } + }, "outputs": [], "source": [ "import requests\n", @@ -227,7 +248,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-05T05:11:43.483575Z", + "iopub.status.busy": "2024-11-05T05:11:43.483295Z", + "iopub.status.idle": "2024-11-05T05:11:44.242950Z", + "shell.execute_reply": "2024-11-05T05:11:44.242248Z" + } + }, "outputs": [], "source": [ "import requests, json\n", @@ -262,10 +290,10 @@ "execution_count": 8, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T02:46:52.898411Z", - "iopub.status.busy": "2024-11-01T02:46:52.898149Z", - "iopub.status.idle": "2024-11-01T02:46:54.398382Z", - "shell.execute_reply": "2024-11-01T02:46:54.397564Z" + "iopub.execute_input": "2024-11-05T05:11:44.245660Z", + "iopub.status.busy": "2024-11-05T05:11:44.245373Z", + "iopub.status.idle": "2024-11-05T05:11:45.591682Z", + "shell.execute_reply": "2024-11-05T05:11:45.591184Z" } }, "outputs": [], diff --git a/examples/runtime/reward_model.py b/examples/runtime/reward_model.py index a18417df7..1a1177e66 100644 --- a/examples/runtime/reward_model.py +++ b/examples/runtime/reward_model.py @@ -24,7 +24,7 @@ json_data = { ], } response = requests.post( - url + "/judge", + url + "/classify", json=json_data, ).json() diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index d96176b2d..c881ba395 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -253,7 +253,7 @@ app.post("/encode")(encode_request) app.put("/encode")(encode_request) -async def judge_request(obj: EmbeddingReqInput, request: Request): +async def classify_request(obj: EmbeddingReqInput, request: Request): """Handle a reward model request. Now the arguments and return values are the same as embedding models.""" try: ret = await tokenizer_manager.generate_request(obj, request).__anext__() @@ -264,8 +264,8 @@ async def judge_request(obj: EmbeddingReqInput, request: Request): ) -app.post("/judge")(judge_request) -app.put("/judge")(judge_request) +app.post("/classify")(classify_request) +app.put("/classify")(classify_request) @app.post("/v1/completions")