diff --git a/python/sglang/srt/managers/controller/tp_worker.py b/python/sglang/srt/managers/controller/tp_worker.py index abd933075..a688c53e3 100644 --- a/python/sglang/srt/managers/controller/tp_worker.py +++ b/python/sglang/srt/managers/controller/tp_worker.py @@ -442,8 +442,11 @@ class ModelTpServer: else: # Add this request to the running batch if ( - new_batch_input_tokens + req.extend_input_len - <= self.chunked_prefill_size + self.chunked_prefill_size is None + or ( + new_batch_input_tokens + req.extend_input_len + <= self.chunked_prefill_size + ) or ( req.return_logprob and req.normalized_prompt_logprob is None ) diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 69829a7fc..8b3de98e2 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -87,8 +87,6 @@ class ServerArgs: node_rank: Optional[int] = None def __post_init__(self): - if self.chunked_prefill_size is None: - self.chunked_prefill_size = 1 << 30 if self.tokenizer_path is None: self.tokenizer_path = self.model_path if self.mem_fraction_static is None: @@ -414,7 +412,7 @@ class ServerArgs: ), "multi-node data parallel is not supported" assert not ( - self.chunked_prefill_size < (1 << 30) and self.disable_radix_cache + self.chunked_prefill_size is not None and self.disable_radix_cache ), "chunked prefill is not supported with radix cache disabled currently"