Remove inf value for chunked prefill size (#812)
This commit is contained in:
@@ -442,8 +442,11 @@ class ModelTpServer:
|
|||||||
else:
|
else:
|
||||||
# Add this request to the running batch
|
# Add this request to the running batch
|
||||||
if (
|
if (
|
||||||
new_batch_input_tokens + req.extend_input_len
|
self.chunked_prefill_size is None
|
||||||
<= self.chunked_prefill_size
|
or (
|
||||||
|
new_batch_input_tokens + req.extend_input_len
|
||||||
|
<= self.chunked_prefill_size
|
||||||
|
)
|
||||||
or (
|
or (
|
||||||
req.return_logprob and req.normalized_prompt_logprob is None
|
req.return_logprob and req.normalized_prompt_logprob is None
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -87,8 +87,6 @@ class ServerArgs:
|
|||||||
node_rank: Optional[int] = None
|
node_rank: Optional[int] = None
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
if self.chunked_prefill_size is None:
|
|
||||||
self.chunked_prefill_size = 1 << 30
|
|
||||||
if self.tokenizer_path is None:
|
if self.tokenizer_path is None:
|
||||||
self.tokenizer_path = self.model_path
|
self.tokenizer_path = self.model_path
|
||||||
if self.mem_fraction_static is None:
|
if self.mem_fraction_static is None:
|
||||||
@@ -414,7 +412,7 @@ class ServerArgs:
|
|||||||
), "multi-node data parallel is not supported"
|
), "multi-node data parallel is not supported"
|
||||||
|
|
||||||
assert not (
|
assert not (
|
||||||
self.chunked_prefill_size < (1 << 30) and self.disable_radix_cache
|
self.chunked_prefill_size is not None and self.disable_radix_cache
|
||||||
), "chunked prefill is not supported with radix cache disabled currently"
|
), "chunked prefill is not supported with radix cache disabled currently"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user