Fix chunked prefill size validation for disabled state (#8973)
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -1999,9 +1999,11 @@ class ServerArgs:
|
|||||||
), "enable_mixed_chunk is required for speculative decoding"
|
), "enable_mixed_chunk is required for speculative decoding"
|
||||||
|
|
||||||
# Check chunked prefill
|
# Check chunked prefill
|
||||||
assert (
|
# Skip validation if chunked prefill is disabled (i.e., size <= 0).
|
||||||
self.chunked_prefill_size % self.page_size == 0
|
if self.chunked_prefill_size > 0:
|
||||||
), "chunked_prefill_size must be divisible by page_size"
|
assert (
|
||||||
|
self.chunked_prefill_size % self.page_size == 0
|
||||||
|
), "chunked_prefill_size must be divisible by page_size"
|
||||||
|
|
||||||
def check_lora_server_args(self):
|
def check_lora_server_args(self):
|
||||||
assert self.max_loras_per_batch > 0, "max_loras_per_batch must be positive"
|
assert self.max_loras_per_batch > 0, "max_loras_per_batch must be positive"
|
||||||
|
|||||||
Reference in New Issue
Block a user