From 1078396f4784208d389943c6fd99c115aee70fe8 Mon Sep 17 00:00:00 2001 From: Ke Bao Date: Fri, 11 Apr 2025 00:12:44 +0800 Subject: [PATCH] Update deps for mllama4 (#5215) --- python/pyproject.toml | 3 +- python/sglang/srt/managers/scheduler.py | 3 +- test/srt/test_vision_openai_server.py | 45 +++++++++++++------------ 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 9a5a33ca5..11ee0ed1b 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -38,7 +38,7 @@ runtime_common = [ "pyzmq>=25.1.2", "soundfile==0.13.1", "torchao>=0.7.0", - "transformers==4.51.0", + "transformers==4.51.1", "uvicorn", "uvloop", "compressed-tensors", @@ -50,6 +50,7 @@ srt = [ "sgl-kernel==0.0.8", "flashinfer_python==0.2.3", "torch==2.5.1", + "torchvision==0.20.1", "cuda-python", "outlines>=0.0.44,<=0.1.11", "partial_json_parser", diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index 383cd6809..359573fc6 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -840,7 +840,6 @@ class Scheduler( bootstrap_room=recv_req.bootstrap_room, ) req.tokenizer = self.tokenizer - req.queue_time_start = time.time() if ( recv_req.session_params is not None @@ -855,7 +854,6 @@ class Scheduler( # Create a new request from a previous session session = self.sessions[recv_req.session_params.id] req = session.create_req(recv_req, self.tokenizer) - req.queue_time_start = time.time() if isinstance(req.finished_reason, FINISH_ABORT): self._add_request_to_queue(req) return @@ -958,6 +956,7 @@ class Scheduler( self.disagg_decode_prealloc_queue.add(req) else: + req.queue_time_start = time.time() self.waiting_queue.append(req) def _extend_requests_to_queue(self, reqs: List[Req], is_retracted: bool = False): diff --git a/test/srt/test_vision_openai_server.py b/test/srt/test_vision_openai_server.py index 40e0de5d2..0e40cfc15 100644 --- a/test/srt/test_vision_openai_server.py +++ b/test/srt/test_vision_openai_server.py @@ -682,29 +682,30 @@ class TestJanusProServer(TestOpenAIVisionServer): pass -class TestLlama4Server(TestOpenAIVisionServer): - @classmethod - def setUpClass(cls): - cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--chat-template", - "llama-4", - "--mem-fraction-static", - "0.8", - "--tp-size=8", - "--context-length=8192", - ], - ) - cls.base_url += "/v1" +## Skip for ci test +# class TestLlama4Server(TestOpenAIVisionServer): +# @classmethod +# def setUpClass(cls): +# cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct" +# cls.base_url = DEFAULT_URL_FOR_TEST +# cls.api_key = "sk-123456" +# cls.process = popen_launch_server( +# cls.model, +# cls.base_url, +# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, +# other_args=[ +# "--chat-template", +# "llama-4", +# "--mem-fraction-static", +# "0.8", +# "--tp-size=8", +# "--context-length=8192", +# ], +# ) +# cls.base_url += "/v1" - def test_video_chat_completion(self): - pass +# def test_video_chat_completion(self): +# pass class TestGemma3itServer(TestOpenAIVisionServer):