[Fix] Fix update_aclgraph_sizes when running MoE models (#913)

### What this PR does / why we need it? Fix update_aclgraph_sizes when running MoE models. --------- Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com>
2025-05-30 15:17:11 +08:00
parent 3442fbdb23
commit 5a1689fc64
5 changed files with 47 additions and 35 deletions
--- a/vllm_ascend/platform.py
+++ b/vllm_ascend/platform.py
@@ -119,6 +119,26 @@ class NPUPlatform(Platform):
        from vllm.config import CompilationLevel  # noqa: E402
        compilation_config = vllm_config.compilation_config
        model_config = vllm_config.model_config
+        additional_config = vllm_config.additional_config
+        parallel_config = vllm_config.parallel_config
+        cache_config = vllm_config.cache_config
+
+        if parallel_config:
+            # Default value for expert tensor parallel size
+            parallel_config.expert_tensor_parallel_size = parallel_config.tensor_parallel_size
+
+            # NOTE: When enable_expert_parallel is True, we follow vLLM convention:
+            # ep_size = world_size, which means expert_tensor_parallel_size must be 1
+            if (additional_config
+                    and "expert_tensor_parallel_size" in additional_config
+                    and not parallel_config.enable_expert_parallel):
+                parallel_config.expert_tensor_parallel_size = int(
+                    additional_config["expert_tensor_parallel_size"])
+
+            # Calculate expert parallel size based on world size
+            parallel_config.expert_parallel_size = (
+                parallel_config.world_size //
+                parallel_config.expert_tensor_parallel_size)

        if model_config is None:
            logger.warning("Model config is missing. This may indicate "
@@ -127,9 +147,9 @@ class NPUPlatform(Platform):
        else:
            enforce_eager = getattr(model_config, "enforce_eager", False)

-        if vllm_config.additional_config is not None:
-            enable_graph_mode = vllm_config.additional_config.get(
-                "enable_graph_mode", False)
+        if additional_config is not None:
+            enable_graph_mode = additional_config.get("enable_graph_mode",
+                                                      False)
            if enable_graph_mode:
                if enforce_eager:
                    raise RuntimeError(
@@ -139,7 +159,7 @@ class NPUPlatform(Platform):
                    logger.warning(
                        "NPU graph mode is still experimental and not supported for V1 without mla currently, "
                        "it has been disabled automatically.")
-                    vllm_config.additional_config["enable_graph_mode"] = False
+                    additional_config["enable_graph_mode"] = False
                if model_config:
                    model_type = model_config.hf_config.model_type
                    if "deepseek" not in model_type:
@@ -178,7 +198,6 @@ class NPUPlatform(Platform):
                ["vllm.unified_ascend_attention_with_output"])
            update_aclgraph_sizes(vllm_config)

-        parallel_config = vllm_config.parallel_config
        if parallel_config and parallel_config.worker_cls == "auto":
            if envs.VLLM_USE_V1:
                parallel_config.worker_cls = "vllm_ascend.worker.worker_v1.NPUWorker"
@@ -190,7 +209,6 @@ class NPUPlatform(Platform):
            else:
                parallel_config.worker_cls = "vllm_ascend.worker.worker.NPUWorker"

-        cache_config = vllm_config.cache_config
        if cache_config:
            if cache_config.block_size is None:
                cache_config.block_size = 128
@@ -202,11 +220,10 @@ class NPUPlatform(Platform):

        if envs.VLLM_USE_V1:
            # Activate custom ops for v1.
-            vllm_config.compilation_config.custom_ops = ["all"]
+            compilation_config.custom_ops = ["all"]
            # If ascend_scheduler_config exists in additional_config,
            # extents original scheduler_config to use AscendScheduler.

-            additional_config = vllm_config.additional_config
            if additional_config and additional_config.get(
                    "ascend_scheduler_config", None) is not None:
                additional_scheduler_config = additional_config.get(