Remove inf value for chunked prefill size (#812)

This commit is contained in:
Liangsheng Yin
2024-07-29 18:34:25 -07:00
committed by GitHub
parent c8e9fed87a
commit 3520f75fb1
2 changed files with 6 additions and 5 deletions

View File

@@ -442,8 +442,11 @@ class ModelTpServer:
else:
# Add this request to the running batch
if (
new_batch_input_tokens + req.extend_input_len
<= self.chunked_prefill_size
self.chunked_prefill_size is None
or (
new_batch_input_tokens + req.extend_input_len
<= self.chunked_prefill_size
)
or (
req.return_logprob and req.normalized_prompt_logprob is None
)

View File

@@ -87,8 +87,6 @@ class ServerArgs:
node_rank: Optional[int] = None
def __post_init__(self):
if self.chunked_prefill_size is None:
self.chunked_prefill_size = 1 << 30
if self.tokenizer_path is None:
self.tokenizer_path = self.model_path
if self.mem_fraction_static is None:
@@ -414,7 +412,7 @@ class ServerArgs:
), "multi-node data parallel is not supported"
assert not (
self.chunked_prefill_size < (1 << 30) and self.disable_radix_cache
self.chunked_prefill_size is not None and self.disable_radix_cache
), "chunked prefill is not supported with radix cache disabled currently"