[CI] Minor fix for CI (#2187)
This commit is contained in:
@@ -17,12 +17,12 @@ dependencies = ["requests", "tqdm", "numpy", "IPython"]
|
|||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
runtime_common = ["aiohttp", "decord", "fastapi",
|
runtime_common = ["aiohttp", "decord", "fastapi",
|
||||||
"hf_transfer", "huggingface_hub", "interegular",
|
"hf_transfer", "huggingface_hub", "interegular", "modelscope",
|
||||||
"orjson", "outlines>=0.0.44,<0.1.0",
|
"orjson", "outlines>=0.0.44,<0.1.0",
|
||||||
"packaging", "pillow", "prometheus-client>=0.20.0",
|
"packaging", "pillow", "prometheus-client>=0.20.0",
|
||||||
"psutil", "pydantic", "python-multipart",
|
"psutil", "pydantic", "python-multipart",
|
||||||
"pyzmq>=25.1.2", "torchao", "uvicorn", "uvloop",
|
"pyzmq>=25.1.2", "torchao", "uvicorn", "uvloop",
|
||||||
"modelscope", "xgrammar==0.1.4"]
|
"xgrammar>=0.1.4"]
|
||||||
srt = ["sglang[runtime_common]", "torch", "vllm>=0.6.3.post1"]
|
srt = ["sglang[runtime_common]", "torch", "vllm>=0.6.3.post1"]
|
||||||
|
|
||||||
# HIP (Heterogeneous-computing Interface for Portability) for AMD
|
# HIP (Heterogeneous-computing Interface for Portability) for AMD
|
||||||
|
|||||||
@@ -526,7 +526,7 @@ class Scheduler:
|
|||||||
recv_req: TokenizedGenerateReqInput,
|
recv_req: TokenizedGenerateReqInput,
|
||||||
):
|
):
|
||||||
if recv_req.session_id is None or recv_req.session_id not in self.sessions:
|
if recv_req.session_id is None or recv_req.session_id not in self.sessions:
|
||||||
# Check if input_embeds is present and create dummy input_ids
|
# Create a new request
|
||||||
if recv_req.input_embeds is not None:
|
if recv_req.input_embeds is not None:
|
||||||
# Generate fake input_ids based on the length of input_embeds
|
# Generate fake input_ids based on the length of input_embeds
|
||||||
seq_length = len(recv_req.input_embeds)
|
seq_length = len(recv_req.input_embeds)
|
||||||
@@ -542,6 +542,7 @@ class Scheduler:
|
|||||||
input_embeds=recv_req.input_embeds,
|
input_embeds=recv_req.input_embeds,
|
||||||
)
|
)
|
||||||
req.tokenizer = self.tokenizer
|
req.tokenizer = self.tokenizer
|
||||||
|
|
||||||
if recv_req.session_id is not None:
|
if recv_req.session_id is not None:
|
||||||
req.finished_reason = FINISH_ABORT(
|
req.finished_reason = FINISH_ABORT(
|
||||||
f"Invalid request: session id {recv_req.session_id} does not exist"
|
f"Invalid request: session id {recv_req.session_id} does not exist"
|
||||||
@@ -549,7 +550,7 @@ class Scheduler:
|
|||||||
self.waiting_queue.append(req)
|
self.waiting_queue.append(req)
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
# Handle sessions
|
# Create a new request from a previsou session
|
||||||
session = self.sessions[recv_req.session_id]
|
session = self.sessions[recv_req.session_id]
|
||||||
req = session.create_req(recv_req, self.tokenizer)
|
req = session.create_req(recv_req, self.tokenizer)
|
||||||
if isinstance(req.finished_reason, FINISH_ABORT):
|
if isinstance(req.finished_reason, FINISH_ABORT):
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ suites = {
|
|||||||
"test_radix_attention.py",
|
"test_radix_attention.py",
|
||||||
"test_retract_decode.py",
|
"test_retract_decode.py",
|
||||||
"test_server_args.py",
|
"test_server_args.py",
|
||||||
|
"test_session_control.py",
|
||||||
"test_skip_tokenizer_init.py",
|
"test_skip_tokenizer_init.py",
|
||||||
"test_srt_engine.py",
|
"test_srt_engine.py",
|
||||||
"test_srt_endpoint.py",
|
"test_srt_endpoint.py",
|
||||||
@@ -35,7 +36,6 @@ suites = {
|
|||||||
"test_triton_attention_backend.py",
|
"test_triton_attention_backend.py",
|
||||||
"test_update_weights.py",
|
"test_update_weights.py",
|
||||||
"test_vision_openai_server.py",
|
"test_vision_openai_server.py",
|
||||||
"test_session_control.py",
|
|
||||||
],
|
],
|
||||||
"sampling/penaltylib": glob.glob(
|
"sampling/penaltylib": glob.glob(
|
||||||
"sampling/penaltylib/**/test_*.py", recursive=True
|
"sampling/penaltylib/**/test_*.py", recursive=True
|
||||||
|
|||||||
Reference in New Issue
Block a user