diff --git a/python/pyproject.toml b/python/pyproject.toml index 4b6da31b3..feb721abb 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -17,12 +17,12 @@ dependencies = ["requests", "tqdm", "numpy", "IPython"] [project.optional-dependencies] runtime_common = ["aiohttp", "decord", "fastapi", - "hf_transfer", "huggingface_hub", "interegular", + "hf_transfer", "huggingface_hub", "interegular", "modelscope", "orjson", "outlines>=0.0.44,<0.1.0", "packaging", "pillow", "prometheus-client>=0.20.0", "psutil", "pydantic", "python-multipart", "pyzmq>=25.1.2", "torchao", "uvicorn", "uvloop", - "modelscope", "xgrammar==0.1.4"] + "xgrammar>=0.1.4"] srt = ["sglang[runtime_common]", "torch", "vllm>=0.6.3.post1"] # HIP (Heterogeneous-computing Interface for Portability) for AMD diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index 0994aeb59..35a064e5e 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -526,7 +526,7 @@ class Scheduler: recv_req: TokenizedGenerateReqInput, ): if recv_req.session_id is None or recv_req.session_id not in self.sessions: - # Check if input_embeds is present and create dummy input_ids + # Create a new request if recv_req.input_embeds is not None: # Generate fake input_ids based on the length of input_embeds seq_length = len(recv_req.input_embeds) @@ -542,6 +542,7 @@ class Scheduler: input_embeds=recv_req.input_embeds, ) req.tokenizer = self.tokenizer + if recv_req.session_id is not None: req.finished_reason = FINISH_ABORT( f"Invalid request: session id {recv_req.session_id} does not exist" @@ -549,7 +550,7 @@ class Scheduler: self.waiting_queue.append(req) return else: - # Handle sessions + # Create a new request from a previsou session session = self.sessions[recv_req.session_id] req = session.create_req(recv_req, self.tokenizer) if isinstance(req.finished_reason, FINISH_ABORT): diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 560c77c2a..6611330ae 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -25,6 +25,7 @@ suites = { "test_radix_attention.py", "test_retract_decode.py", "test_server_args.py", + "test_session_control.py", "test_skip_tokenizer_init.py", "test_srt_engine.py", "test_srt_endpoint.py", @@ -35,7 +36,6 @@ suites = { "test_triton_attention_backend.py", "test_update_weights.py", "test_vision_openai_server.py", - "test_session_control.py", ], "sampling/penaltylib": glob.glob( "sampling/penaltylib/**/test_*.py", recursive=True