From 52e1f52f328f656709feac0505fc12edae001871 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Sat, 9 Aug 2025 01:34:03 -0700 Subject: [PATCH] [bugfix] Fix missing args in bench one batch (#8877) --- python/sglang/bench_one_batch.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/sglang/bench_one_batch.py b/python/sglang/bench_one_batch.py index 3e94ec811..36530445a 100644 --- a/python/sglang/bench_one_batch.py +++ b/python/sglang/bench_one_batch.py @@ -61,6 +61,7 @@ from sglang.srt.configs.model_config import ModelConfig from sglang.srt.distributed.parallel_state import destroy_distributed_environment from sglang.srt.entrypoints.engine import _set_envs_and_config from sglang.srt.hf_transformers_utils import get_tokenizer +from sglang.srt.layers.moe.utils import DeepEPMode, MoeA2ABackend from sglang.srt.managers.schedule_batch import Req, ScheduleBatch from sglang.srt.managers.scheduler import Scheduler from sglang.srt.model_executor.forward_batch_info import ForwardBatch @@ -300,6 +301,11 @@ def _maybe_prepare_mlp_sync_batch(batch: ScheduleBatch, model_runner): disable_cuda_graph=model_runner.server_args.disable_cuda_graph, spec_algorithm=SpeculativeAlgorithm.NONE, speculative_num_draft_tokens=None, + enable_two_batch_overlap=model_runner.server_args.enable_two_batch_overlap, + enable_deepep_moe=MoeA2ABackend( + model_runner.server_args.moe_a2a_backend + ).is_deepep(), + deepep_mode=DeepEPMode(model_runner.server_args.deepep_mode), require_mlp_tp_gather=require_mlp_tp_gather(model_runner.server_args), disable_overlap_schedule=model_runner.server_args.disable_overlap_schedule, )