From 3520f75fb14d1932fa226aea534937cc87c1b819 Mon Sep 17 00:00:00 2001 From: Liangsheng Yin Date: Mon, 29 Jul 2024 18:34:25 -0700 Subject: [PATCH] Remove inf value for chunked prefill size (#812) --- python/sglang/srt/managers/controller/tp_worker.py | 7 +++++-- python/sglang/srt/server_args.py | 4 +--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/python/sglang/srt/managers/controller/tp_worker.py b/python/sglang/srt/managers/controller/tp_worker.py index abd933075..a688c53e3 100644 --- a/python/sglang/srt/managers/controller/tp_worker.py +++ b/python/sglang/srt/managers/controller/tp_worker.py @@ -442,8 +442,11 @@ class ModelTpServer: else: # Add this request to the running batch if ( - new_batch_input_tokens + req.extend_input_len - <= self.chunked_prefill_size + self.chunked_prefill_size is None + or ( + new_batch_input_tokens + req.extend_input_len + <= self.chunked_prefill_size + ) or ( req.return_logprob and req.normalized_prompt_logprob is None ) diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 69829a7fc..8b3de98e2 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -87,8 +87,6 @@ class ServerArgs: node_rank: Optional[int] = None def __post_init__(self): - if self.chunked_prefill_size is None: - self.chunked_prefill_size = 1 << 30 if self.tokenizer_path is None: self.tokenizer_path = self.model_path if self.mem_fraction_static is None: @@ -414,7 +412,7 @@ class ServerArgs: ), "multi-node data parallel is not supported" assert not ( - self.chunked_prefill_size < (1 << 30) and self.disable_radix_cache + self.chunked_prefill_size is not None and self.disable_radix_cache ), "chunked prefill is not supported with radix cache disabled currently"