Change judge to classify & Modify make file (#1920)

2024-11-04 23:53:44 -08:00
parent 463d56bf44
commit 02755768d3
9 changed files with 369 additions and 124 deletions
--- a/docs/backend/native_api.ipynb
+++ b/docs/backend/native_api.ipynb
@@ -17,7 +17,7 @@
    "- `/get_memory_pool_size`\n",
    "- `/update_weights`\n",
    "- `/encode`(embedding model)\n",
-    "- `/judge`(reward model)\n",
+    "- `/classify`(reward model)\n",
    "\n",
    "We mainly use `requests` to test these APIs in the following examples. You can also use `curl`."
   ]
@@ -32,7 +32,14 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-05T05:08:08.536886Z",
+     "iopub.status.busy": "2024-11-05T05:08:08.536763Z",
+     "iopub.status.idle": "2024-11-05T05:08:34.725831Z",
+     "shell.execute_reply": "2024-11-05T05:08:34.725316Z"
+    }
+   },
   "outputs": [],
   "source": [
    "from sglang.utils import (\n",
@@ -64,7 +71,14 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-05T05:08:34.727530Z",
+     "iopub.status.busy": "2024-11-05T05:08:34.727333Z",
+     "iopub.status.idle": "2024-11-05T05:08:35.359784Z",
+     "shell.execute_reply": "2024-11-05T05:08:35.359090Z"
+    }
+   },
   "outputs": [],
   "source": [
    "url = \"http://localhost:30010/generate\"\n",
@@ -85,7 +99,14 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-05T05:08:35.362286Z",
+     "iopub.status.busy": "2024-11-05T05:08:35.362140Z",
+     "iopub.status.idle": "2024-11-05T05:08:35.368711Z",
+     "shell.execute_reply": "2024-11-05T05:08:35.368220Z"
+    }
+   },
   "outputs": [],
   "source": [
    "url = \"http://localhost:30010/get_server_args\"\n",
@@ -109,7 +130,14 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-05T05:08:35.371313Z",
+     "iopub.status.busy": "2024-11-05T05:08:35.370877Z",
+     "iopub.status.idle": "2024-11-05T05:08:35.376712Z",
+     "shell.execute_reply": "2024-11-05T05:08:35.376230Z"
+    }
+   },
   "outputs": [],
   "source": [
    "url = \"http://localhost:30010/get_model_info\"\n",
@@ -134,7 +162,14 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-05T05:08:35.378982Z",
+     "iopub.status.busy": "2024-11-05T05:08:35.378597Z",
+     "iopub.status.idle": "2024-11-05T05:08:35.391820Z",
+     "shell.execute_reply": "2024-11-05T05:08:35.391336Z"
+    }
+   },
   "outputs": [],
   "source": [
    "url = \"http://localhost:30010/health_generate\"\n",
@@ -146,7 +181,14 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-05T05:08:35.393748Z",
+     "iopub.status.busy": "2024-11-05T05:08:35.393606Z",
+     "iopub.status.idle": "2024-11-05T05:08:35.398645Z",
+     "shell.execute_reply": "2024-11-05T05:08:35.398145Z"
+    }
+   },
   "outputs": [],
   "source": [
    "url = \"http://localhost:30010/health\"\n",
@@ -167,7 +209,14 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-05T05:08:35.400683Z",
+     "iopub.status.busy": "2024-11-05T05:08:35.400419Z",
+     "iopub.status.idle": "2024-11-05T05:08:35.406146Z",
+     "shell.execute_reply": "2024-11-05T05:08:35.405661Z"
+    }
+   },
   "outputs": [],
   "source": [
    "# flush cache\n",
@@ -190,7 +239,14 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-05T05:08:35.408176Z",
+     "iopub.status.busy": "2024-11-05T05:08:35.407884Z",
+     "iopub.status.idle": "2024-11-05T05:08:35.413587Z",
+     "shell.execute_reply": "2024-11-05T05:08:35.413108Z"
+    }
+   },
   "outputs": [],
   "source": [
    "# get_memory_pool_size\n",
@@ -213,7 +269,14 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-05T05:08:35.416090Z",
+     "iopub.status.busy": "2024-11-05T05:08:35.415793Z",
+     "iopub.status.idle": "2024-11-05T05:08:36.552549Z",
+     "shell.execute_reply": "2024-11-05T05:08:36.551870Z"
+    }
+   },
   "outputs": [],
   "source": [
    "# successful update with same architecture and size\n",
@@ -231,7 +294,14 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-05T05:08:36.554823Z",
+     "iopub.status.busy": "2024-11-05T05:08:36.554680Z",
+     "iopub.status.idle": "2024-11-05T05:08:38.053945Z",
+     "shell.execute_reply": "2024-11-05T05:08:38.053034Z"
+    }
+   },
   "outputs": [],
   "source": [
    "# failed update with different parameter size\n",
@@ -263,7 +333,14 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-05T05:08:38.056783Z",
+     "iopub.status.busy": "2024-11-05T05:08:38.056497Z",
+     "iopub.status.idle": "2024-11-05T05:09:04.436030Z",
+     "shell.execute_reply": "2024-11-05T05:09:04.435311Z"
+    }
+   },
   "outputs": [],
   "source": [
    "terminate_process(server_process)\n",
@@ -281,7 +358,14 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-05T05:09:04.438987Z",
+     "iopub.status.busy": "2024-11-05T05:09:04.438568Z",
+     "iopub.status.idle": "2024-11-05T05:09:04.485291Z",
+     "shell.execute_reply": "2024-11-05T05:09:04.484829Z"
+    }
+   },
   "outputs": [],
   "source": [
    "# successful encode for embedding model\n",
@@ -298,15 +382,22 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Judge (reward model)\n",
+    "## Classify (reward model)\n",
    "\n",
-    "SGLang Runtime also supports reward models. Here we use a reward model to judge the quality of pairwise generations."
+    "SGLang Runtime also supports reward models. Here we use a reward model to classify the quality of pairwise generations."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-05T05:09:04.487191Z",
+     "iopub.status.busy": "2024-11-05T05:09:04.486929Z",
+     "iopub.status.idle": "2024-11-05T05:09:25.553481Z",
+     "shell.execute_reply": "2024-11-05T05:09:25.552747Z"
+    }
+   },
   "outputs": [],
   "source": [
    "terminate_process(embedding_process)\n",
@@ -326,7 +417,14 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-05T05:09:25.555813Z",
+     "iopub.status.busy": "2024-11-05T05:09:25.555666Z",
+     "iopub.status.idle": "2024-11-05T05:09:26.354372Z",
+     "shell.execute_reply": "2024-11-05T05:09:26.353693Z"
+    }
+   },
   "outputs": [],
   "source": [
    "from transformers import AutoTokenizer\n",
@@ -346,7 +444,7 @@
    "tokenizer = AutoTokenizer.from_pretrained(\"Skywork/Skywork-Reward-Llama-3.1-8B-v0.2\")\n",
    "prompts = tokenizer.apply_chat_template(CONVS, tokenize=False)\n",
    "\n",
-    "url = \"http://localhost:30030/judge\"\n",
+    "url = \"http://localhost:30030/classify\"\n",
    "data = {\n",
    "    \"model\": \"Skywork/Skywork-Reward-Llama-3.1-8B-v0.2\", \n",
    "    \"text\": prompts\n",
@@ -360,7 +458,14 @@
  {
   "cell_type": "code",
   "execution_count": 15,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-05T05:09:26.356532Z",
+     "iopub.status.busy": "2024-11-05T05:09:26.356327Z",
+     "iopub.status.idle": "2024-11-05T05:09:26.396590Z",
+     "shell.execute_reply": "2024-11-05T05:09:26.395914Z"
+    }
+   },
   "outputs": [],
   "source": [
    "terminate_process(reward_process)"