[bugfix] Fix missing args in bench one batch (#8877)
This commit is contained in:
@@ -61,6 +61,7 @@ from sglang.srt.configs.model_config import ModelConfig
|
|||||||
from sglang.srt.distributed.parallel_state import destroy_distributed_environment
|
from sglang.srt.distributed.parallel_state import destroy_distributed_environment
|
||||||
from sglang.srt.entrypoints.engine import _set_envs_and_config
|
from sglang.srt.entrypoints.engine import _set_envs_and_config
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
from sglang.srt.hf_transformers_utils import get_tokenizer
|
||||||
|
from sglang.srt.layers.moe.utils import DeepEPMode, MoeA2ABackend
|
||||||
from sglang.srt.managers.schedule_batch import Req, ScheduleBatch
|
from sglang.srt.managers.schedule_batch import Req, ScheduleBatch
|
||||||
from sglang.srt.managers.scheduler import Scheduler
|
from sglang.srt.managers.scheduler import Scheduler
|
||||||
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
|
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
|
||||||
@@ -300,6 +301,11 @@ def _maybe_prepare_mlp_sync_batch(batch: ScheduleBatch, model_runner):
|
|||||||
disable_cuda_graph=model_runner.server_args.disable_cuda_graph,
|
disable_cuda_graph=model_runner.server_args.disable_cuda_graph,
|
||||||
spec_algorithm=SpeculativeAlgorithm.NONE,
|
spec_algorithm=SpeculativeAlgorithm.NONE,
|
||||||
speculative_num_draft_tokens=None,
|
speculative_num_draft_tokens=None,
|
||||||
|
enable_two_batch_overlap=model_runner.server_args.enable_two_batch_overlap,
|
||||||
|
enable_deepep_moe=MoeA2ABackend(
|
||||||
|
model_runner.server_args.moe_a2a_backend
|
||||||
|
).is_deepep(),
|
||||||
|
deepep_mode=DeepEPMode(model_runner.server_args.deepep_mode),
|
||||||
require_mlp_tp_gather=require_mlp_tp_gather(model_runner.server_args),
|
require_mlp_tp_gather=require_mlp_tp_gather(model_runner.server_args),
|
||||||
disable_overlap_schedule=model_runner.server_args.disable_overlap_schedule,
|
disable_overlap_schedule=model_runner.server_args.disable_overlap_schedule,
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user