[Fix] Fix update_aclgraph_sizes when running MoE models (#913)

### What this PR does / why we need it?
Fix update_aclgraph_sizes when running MoE models.

---------

Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com>
This commit is contained in:
yiz-liu
2025-05-30 15:17:11 +08:00
committed by GitHub
parent 3442fbdb23
commit 5a1689fc64
5 changed files with 47 additions and 35 deletions

View File

@@ -534,7 +534,6 @@ class NPUWorker(LocalOrDistributedWorkerBase):
backend: str = "hccl") -> None:
"""Initialize the distributed environment."""
parallel_config = self.parallel_config
additional_config = self.vllm_config.additional_config
set_custom_all_reduce(not parallel_config.disable_custom_all_reduce)
init_distributed_environment(parallel_config.world_size, rank,
distributed_init_method, local_rank,
@@ -542,13 +541,11 @@ class NPUWorker(LocalOrDistributedWorkerBase):
ensure_model_parallel_initialized(
parallel_config.tensor_parallel_size,
parallel_config.pipeline_parallel_size)
expert_tensor_parallel_size = 1
if additional_config:
expert_tensor_parallel_size = additional_config.get(
"expert_tensor_parallel_size", 1)
init_ascend_model_parallel(parallel_config.tensor_parallel_size,
parallel_config.pipeline_parallel_size,
expert_tensor_parallel_size)
init_ascend_model_parallel(
parallel_config.expert_parallel_size,
parallel_config.expert_tensor_parallel_size,
parallel_config.world_size,
)
ensure_kv_transfer_initialized(vllm_config)

View File

@@ -234,7 +234,6 @@ class NPUWorker(WorkerBase):
def _init_worker_distributed_environment(self) -> None:
"""Initialize the distributed environment."""
additional_config = self.vllm_config.additional_config
parallel_config = self.vllm_config.parallel_config
set_custom_all_reduce(
not self.parallel_config.disable_custom_all_reduce)
@@ -244,13 +243,11 @@ class NPUWorker(WorkerBase):
ensure_model_parallel_initialized(
self.parallel_config.tensor_parallel_size,
self.parallel_config.pipeline_parallel_size)
expert_tensor_parallel_size = 1
if additional_config is not None and "expert_tensor_parallel_size" in additional_config:
expert_tensor_parallel_size = int(
additional_config["expert_tensor_parallel_size"])
init_ascend_model_parallel(parallel_config.tensor_parallel_size,
parallel_config.pipeline_parallel_size,
expert_tensor_parallel_size)
init_ascend_model_parallel(
parallel_config.expert_parallel_size,
parallel_config.expert_tensor_parallel_size,
parallel_config.world_size,
)
ensure_kv_transfer_initialized(self.vllm_config)
def _init_profiler(self):