Reduce scheduler recv requests overhead (#8947)

2025-08-08 15:10:05 +08:00
parent 76915d68a8
commit 774b47f3f1
3 changed files with 54 additions and 0 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -249,6 +249,7 @@ class ServerArgs:
    enable_return_hidden_states: bool = False
    enable_triton_kernel_moe: bool = False
    enable_flashinfer_mxfp4_moe: bool = False
+    scheduler_recv_interval: int = 1

    # Debug tensor dumps
    debug_tensor_dump_output_folder: Optional[str] = None
@@ -1845,6 +1846,12 @@ class ServerArgs:
            action="store_true",
            help="Enable FlashInfer MXFP4 MoE backend for modelopt_fp4 quant on Blackwell.",
        )
+        parser.add_argument(
+            "--scheduler-recv-interval",
+            type=int,
+            default=ServerArgs.scheduler_recv_interval,
+            help="The interval to poll requests in scheduler. Can be set to >1 to reduce the overhead of this.",
+        )

        # Debug tensor dumps
        parser.add_argument(