Reduce scheduler recv requests overhead (#8947)

This commit is contained in:
fzyzcjy
2025-08-08 15:10:05 +08:00
committed by GitHub
parent 76915d68a8
commit 774b47f3f1
3 changed files with 54 additions and 0 deletions

View File

@@ -249,6 +249,7 @@ class ServerArgs:
enable_return_hidden_states: bool = False
enable_triton_kernel_moe: bool = False
enable_flashinfer_mxfp4_moe: bool = False
scheduler_recv_interval: int = 1
# Debug tensor dumps
debug_tensor_dump_output_folder: Optional[str] = None
@@ -1845,6 +1846,12 @@ class ServerArgs:
action="store_true",
help="Enable FlashInfer MXFP4 MoE backend for modelopt_fp4 quant on Blackwell.",
)
parser.add_argument(
"--scheduler-recv-interval",
type=int,
default=ServerArgs.scheduler_recv_interval,
help="The interval to poll requests in scheduler. Can be set to >1 to reduce the overhead of this.",
)
# Debug tensor dumps
parser.add_argument(