From 42f34437abeba34b6bc9a1bf7f6875d27d2dd912 Mon Sep 17 00:00:00 2001 From: pranavm-nvidia <49246958+pranavm-nvidia@users.noreply.github.com> Date: Fri, 29 Aug 2025 17:29:32 -0700 Subject: [PATCH] Adds initialize_moe_config to bench_one_batch so MOE backend is respected (#9670) --- python/sglang/bench_one_batch.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/sglang/bench_one_batch.py b/python/sglang/bench_one_batch.py index aa43bb027..ebd461ec3 100644 --- a/python/sglang/bench_one_batch.py +++ b/python/sglang/bench_one_batch.py @@ -61,6 +61,7 @@ from sglang.srt.configs.model_config import ModelConfig from sglang.srt.distributed.parallel_state import destroy_distributed_environment from sglang.srt.entrypoints.engine import _set_envs_and_config from sglang.srt.hf_transformers_utils import get_tokenizer +from sglang.srt.layers.moe import initialize_moe_config from sglang.srt.managers.schedule_batch import Req, ScheduleBatch from sglang.srt.managers.scheduler import Scheduler from sglang.srt.model_executor.forward_batch_info import ForwardBatch @@ -509,6 +510,8 @@ def latency_test( bench_args, tp_rank, ): + initialize_moe_config(server_args) + # Set CPU affinity if get_bool_env_var("SGLANG_SET_CPU_AFFINITY"): set_gpu_proc_affinity(server_args.tp_size, server_args.nnodes, tp_rank)