fix(server_args): Skip chunked_prefill_size validation when disaggregation mode is decode (#10358)
This commit is contained in:
@@ -391,7 +391,7 @@ class ServerArgs:
|
|||||||
debug_tensor_dump_prefill_only: bool = False
|
debug_tensor_dump_prefill_only: bool = False
|
||||||
|
|
||||||
# PD disaggregation: can be "null" (not disaggregated), "prefill" (prefill-only), or "decode" (decode-only)
|
# PD disaggregation: can be "null" (not disaggregated), "prefill" (prefill-only), or "decode" (decode-only)
|
||||||
disaggregation_mode: str = "null"
|
disaggregation_mode: Literal["null", "prefill", "decode"] = "null"
|
||||||
disaggregation_transfer_backend: str = "mooncake"
|
disaggregation_transfer_backend: str = "mooncake"
|
||||||
disaggregation_bootstrap_port: int = 8998
|
disaggregation_bootstrap_port: int = 8998
|
||||||
disaggregation_decode_tp: Optional[int] = None
|
disaggregation_decode_tp: Optional[int] = None
|
||||||
@@ -2252,7 +2252,7 @@ class ServerArgs:
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--disaggregation-mode",
|
"--disaggregation-mode",
|
||||||
type=str,
|
type=str,
|
||||||
default="null",
|
default=ServerArgs.disaggregation_mode,
|
||||||
choices=["null", "prefill", "decode"],
|
choices=["null", "prefill", "decode"],
|
||||||
help='Only used for PD disaggregation. "prefill" for prefill-only server, and "decode" for decode-only server. If not specified, it is not PD disaggregated',
|
help='Only used for PD disaggregation. "prefill" for prefill-only server, and "decode" for decode-only server. If not specified, it is not PD disaggregated',
|
||||||
)
|
)
|
||||||
@@ -2436,7 +2436,8 @@ class ServerArgs:
|
|||||||
|
|
||||||
# Check chunked prefill
|
# Check chunked prefill
|
||||||
# Skip validation if chunked prefill is disabled (i.e., size <= 0).
|
# Skip validation if chunked prefill is disabled (i.e., size <= 0).
|
||||||
if self.chunked_prefill_size > 0:
|
# Skip validation if disaggregation mode is decode.
|
||||||
|
if self.chunked_prefill_size > 0 and self.disaggregation_mode != "decode":
|
||||||
assert (
|
assert (
|
||||||
self.chunked_prefill_size % self.page_size == 0
|
self.chunked_prefill_size % self.page_size == 0
|
||||||
), "chunked_prefill_size must be divisible by page_size"
|
), "chunked_prefill_size must be divisible by page_size"
|
||||||
|
|||||||
Reference in New Issue
Block a user