Remove inf value for chunked prefill size (#812)

2024-07-29 18:34:25 -07:00
parent c8e9fed87a
commit 3520f75fb1
2 changed files with 6 additions and 5 deletions
--- a/python/sglang/srt/managers/controller/tp_worker.py
+++ b/python/sglang/srt/managers/controller/tp_worker.py
@@ -442,8 +442,11 @@ class ModelTpServer:
                else:
                    # Add this request to the running batch
                    if (
-                        new_batch_input_tokens + req.extend_input_len
-                        <= self.chunked_prefill_size
+                        self.chunked_prefill_size is None
+                        or (
+                            new_batch_input_tokens + req.extend_input_len
+                            <= self.chunked_prefill_size
+                        )
                        or (
                            req.return_logprob and req.normalized_prompt_logprob is None
                        )
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -87,8 +87,6 @@ class ServerArgs:
    node_rank: Optional[int] = None

    def __post_init__(self):
-        if self.chunked_prefill_size is None:
-            self.chunked_prefill_size = 1 << 30
        if self.tokenizer_path is None:
            self.tokenizer_path = self.model_path
        if self.mem_fraction_static is None:
@@ -414,7 +412,7 @@ class ServerArgs:
        ), "multi-node data parallel is not supported"

        assert not (
-            self.chunked_prefill_size < (1 << 30) and self.disable_radix_cache
+            self.chunked_prefill_size is not None and self.disable_radix_cache
        ), "chunked prefill is not supported with radix cache disabled currently"