[feat] Add session control (#2073)

2024-11-20 00:36:53 -08:00
parent 63a395b985
commit 5942dfc00a
8 changed files with 348 additions and 8 deletions
--- a/test/srt/test_session_id.py
+++ b/test/srt/test_session_id.py
@@ -0,0 +1,133 @@
+"""
+Copyright 2023-2024 SGLang Team
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+# FIXME: Make it a CI test
+
+import requests
+
+from sglang.srt.hf_transformers_utils import get_tokenizer
+
+url = "http://localhost:30000"
+
+# Open a session
+response = requests.post(
+    url + "/open_session",
+    json={"capacity_of_str_len": 1000},
+)
+session_id = response.json()
+print("session_id", session_id, "\n")
+
+# Prefill only
+prompt = "chunk 1"
+response = requests.post(
+    url + "/generate",
+    json={
+        "text": prompt,
+        "session_id": session_id,
+        "sampling_params": {
+            "temperature": 0,
+            "max_new_tokens": 0,
+        },
+    },
+)
+print(response.json(), "\n")
+session_id = response.json()["session_id"]
+
+# Generate
+prompt = "Chunk 2"
+response = requests.post(
+    url + "/generate",
+    json={
+        "text": prompt,
+        "session_id": session_id,
+        "sampling_params": {
+            "temperature": 0,
+            "max_new_tokens": 16,
+        },
+    },
+)
+print(response.json(), "\n")
+session_id = response.json()["session_id"]
+rid = response.json()["meta_info"]["id"]
+
+# Generate
+prompt = "Chunk 3"
+response = requests.post(
+    url + "/generate",
+    json={
+        "text": prompt,
+        "session_id": session_id,
+        "sampling_params": {
+            "temperature": 0,
+            "max_new_tokens": 2,
+        },
+    },
+)
+print(response.json(), "\n")
+session_id = response.json()["session_id"]
+rid_to_del = response.json()["meta_info"]["id"]
+
+# Interrupt and re-generate
+prompt = "Chunk 4"
+response = requests.post(
+    url + "/generate",
+    json={
+        "text": prompt,
+        "session_id": session_id,
+        "session_rid": rid,
+        "sampling_params": {
+            "temperature": 0,
+            "max_new_tokens": 16,
+        },
+    },
+)
+print(response.json(), "\n")
+session_id = response.json()["session_id"]
+
+# Query a session based on a deleted request, should see finish reason abort
+prompt = "Chunk 4"
+response = requests.post(
+    url + "/generate",
+    json={
+        "text": prompt,
+        "session_id": session_id,
+        "session_rid": rid_to_del,
+        "sampling_params": {
+            "temperature": 0,
+            "max_new_tokens": 16,
+        },
+    },
+)
+print(response.json(), "\n")
+
+# Close session
+ret = requests.post(
+    url + "/close_session",
+    json={"session_id": session_id},
+)
+print(ret, "\n")
+
+# Query a deleted session, should see finish reason abort
+prompt = "chunk 1"
+response = requests.post(
+    url + "/generate",
+    json={
+        "text": prompt,
+        "session_id": session_id,
+        "sampling_params": {
+            "temperature": 0,
+            "max_new_tokens": 0,
+        },
+    },
+)
+print(response.json(), "\n")