Remove inf value for chunked prefill size (#812)
This commit is contained in:
@@ -442,8 +442,11 @@ class ModelTpServer:
|
||||
else:
|
||||
# Add this request to the running batch
|
||||
if (
|
||||
new_batch_input_tokens + req.extend_input_len
|
||||
<= self.chunked_prefill_size
|
||||
self.chunked_prefill_size is None
|
||||
or (
|
||||
new_batch_input_tokens + req.extend_input_len
|
||||
<= self.chunked_prefill_size
|
||||
)
|
||||
or (
|
||||
req.return_logprob and req.normalized_prompt_logprob is None
|
||||
)
|
||||
|
||||
@@ -87,8 +87,6 @@ class ServerArgs:
|
||||
node_rank: Optional[int] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.chunked_prefill_size is None:
|
||||
self.chunked_prefill_size = 1 << 30
|
||||
if self.tokenizer_path is None:
|
||||
self.tokenizer_path = self.model_path
|
||||
if self.mem_fraction_static is None:
|
||||
@@ -414,7 +412,7 @@ class ServerArgs:
|
||||
), "multi-node data parallel is not supported"
|
||||
|
||||
assert not (
|
||||
self.chunked_prefill_size < (1 << 30) and self.disable_radix_cache
|
||||
self.chunked_prefill_size is not None and self.disable_radix_cache
|
||||
), "chunked prefill is not supported with radix cache disabled currently"
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user