Reduce scheduler recv requests overhead (#8947)
This commit is contained in:
@@ -249,6 +249,7 @@ class ServerArgs:
|
||||
enable_return_hidden_states: bool = False
|
||||
enable_triton_kernel_moe: bool = False
|
||||
enable_flashinfer_mxfp4_moe: bool = False
|
||||
scheduler_recv_interval: int = 1
|
||||
|
||||
# Debug tensor dumps
|
||||
debug_tensor_dump_output_folder: Optional[str] = None
|
||||
@@ -1845,6 +1846,12 @@ class ServerArgs:
|
||||
action="store_true",
|
||||
help="Enable FlashInfer MXFP4 MoE backend for modelopt_fp4 quant on Blackwell.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--scheduler-recv-interval",
|
||||
type=int,
|
||||
default=ServerArgs.scheduler_recv_interval,
|
||||
help="The interval to poll requests in scheduler. Can be set to >1 to reduce the overhead of this.",
|
||||
)
|
||||
|
||||
# Debug tensor dumps
|
||||
parser.add_argument(
|
||||
|
||||
Reference in New Issue
Block a user