[bugfix] Fix missing args in bench one batch (#8877)
This commit is contained in:
@@ -61,6 +61,7 @@ from sglang.srt.configs.model_config import ModelConfig
|
||||
from sglang.srt.distributed.parallel_state import destroy_distributed_environment
|
||||
from sglang.srt.entrypoints.engine import _set_envs_and_config
|
||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
||||
from sglang.srt.layers.moe.utils import DeepEPMode, MoeA2ABackend
|
||||
from sglang.srt.managers.schedule_batch import Req, ScheduleBatch
|
||||
from sglang.srt.managers.scheduler import Scheduler
|
||||
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
|
||||
@@ -300,6 +301,11 @@ def _maybe_prepare_mlp_sync_batch(batch: ScheduleBatch, model_runner):
|
||||
disable_cuda_graph=model_runner.server_args.disable_cuda_graph,
|
||||
spec_algorithm=SpeculativeAlgorithm.NONE,
|
||||
speculative_num_draft_tokens=None,
|
||||
enable_two_batch_overlap=model_runner.server_args.enable_two_batch_overlap,
|
||||
enable_deepep_moe=MoeA2ABackend(
|
||||
model_runner.server_args.moe_a2a_backend
|
||||
).is_deepep(),
|
||||
deepep_mode=DeepEPMode(model_runner.server_args.deepep_mode),
|
||||
require_mlp_tp_gather=require_mlp_tp_gather(model_runner.server_args),
|
||||
disable_overlap_schedule=model_runner.server_args.disable_overlap_schedule,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user