[feat] Add session control (#2073)
This commit is contained in:
133
test/srt/test_session_id.py
Normal file
133
test/srt/test_session_id.py
Normal file
@@ -0,0 +1,133 @@
|
||||
"""
|
||||
Copyright 2023-2024 SGLang Team
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
# FIXME: Make it a CI test
|
||||
|
||||
import requests
|
||||
|
||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
||||
|
||||
url = "http://localhost:30000"
|
||||
|
||||
# Open a session
|
||||
response = requests.post(
|
||||
url + "/open_session",
|
||||
json={"capacity_of_str_len": 1000},
|
||||
)
|
||||
session_id = response.json()
|
||||
print("session_id", session_id, "\n")
|
||||
|
||||
# Prefill only
|
||||
prompt = "chunk 1"
|
||||
response = requests.post(
|
||||
url + "/generate",
|
||||
json={
|
||||
"text": prompt,
|
||||
"session_id": session_id,
|
||||
"sampling_params": {
|
||||
"temperature": 0,
|
||||
"max_new_tokens": 0,
|
||||
},
|
||||
},
|
||||
)
|
||||
print(response.json(), "\n")
|
||||
session_id = response.json()["session_id"]
|
||||
|
||||
# Generate
|
||||
prompt = "Chunk 2"
|
||||
response = requests.post(
|
||||
url + "/generate",
|
||||
json={
|
||||
"text": prompt,
|
||||
"session_id": session_id,
|
||||
"sampling_params": {
|
||||
"temperature": 0,
|
||||
"max_new_tokens": 16,
|
||||
},
|
||||
},
|
||||
)
|
||||
print(response.json(), "\n")
|
||||
session_id = response.json()["session_id"]
|
||||
rid = response.json()["meta_info"]["id"]
|
||||
|
||||
# Generate
|
||||
prompt = "Chunk 3"
|
||||
response = requests.post(
|
||||
url + "/generate",
|
||||
json={
|
||||
"text": prompt,
|
||||
"session_id": session_id,
|
||||
"sampling_params": {
|
||||
"temperature": 0,
|
||||
"max_new_tokens": 2,
|
||||
},
|
||||
},
|
||||
)
|
||||
print(response.json(), "\n")
|
||||
session_id = response.json()["session_id"]
|
||||
rid_to_del = response.json()["meta_info"]["id"]
|
||||
|
||||
# Interrupt and re-generate
|
||||
prompt = "Chunk 4"
|
||||
response = requests.post(
|
||||
url + "/generate",
|
||||
json={
|
||||
"text": prompt,
|
||||
"session_id": session_id,
|
||||
"session_rid": rid,
|
||||
"sampling_params": {
|
||||
"temperature": 0,
|
||||
"max_new_tokens": 16,
|
||||
},
|
||||
},
|
||||
)
|
||||
print(response.json(), "\n")
|
||||
session_id = response.json()["session_id"]
|
||||
|
||||
# Query a session based on a deleted request, should see finish reason abort
|
||||
prompt = "Chunk 4"
|
||||
response = requests.post(
|
||||
url + "/generate",
|
||||
json={
|
||||
"text": prompt,
|
||||
"session_id": session_id,
|
||||
"session_rid": rid_to_del,
|
||||
"sampling_params": {
|
||||
"temperature": 0,
|
||||
"max_new_tokens": 16,
|
||||
},
|
||||
},
|
||||
)
|
||||
print(response.json(), "\n")
|
||||
|
||||
# Close session
|
||||
ret = requests.post(
|
||||
url + "/close_session",
|
||||
json={"session_id": session_id},
|
||||
)
|
||||
print(ret, "\n")
|
||||
|
||||
# Query a deleted session, should see finish reason abort
|
||||
prompt = "chunk 1"
|
||||
response = requests.post(
|
||||
url + "/generate",
|
||||
json={
|
||||
"text": prompt,
|
||||
"session_id": session_id,
|
||||
"sampling_params": {
|
||||
"temperature": 0,
|
||||
"max_new_tokens": 0,
|
||||
},
|
||||
},
|
||||
)
|
||||
print(response.json(), "\n")
|
||||
Reference in New Issue
Block a user