[Fix] Fix update_aclgraph_sizes when running MoE models (#913)

### What this PR does / why we need it? Fix update_aclgraph_sizes when running MoE models. --------- Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com>
2025-05-30 15:17:11 +08:00
parent 3442fbdb23
commit 5a1689fc64
5 changed files with 47 additions and 35 deletions
--- a/vllm_ascend/utils.py
+++ b/vllm_ascend/utils.py
@@ -126,14 +126,16 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
    original_sizes, compilation_config.cudagraph_capture_sizes = \
        compilation_config.cudagraph_capture_sizes, None

-    # Calculate parallel configuration factor (increases with DP or TP)
-    # TODO(Yizhou): This is a temporary solution, need to be improved
-    # in the future, taking into account the other parallel configurations.
+    # Calculate parallel configuration factor
    num_hidden_layers = vllm_config.model_config.hf_config.num_hidden_layers
    parallel_config = vllm_config.parallel_config
+
+    # TODO: Find out whether we need to take into account the pp_size
    parallel_factor = 1 + sum(size > 1 for size in [
-        parallel_config.data_parallel_size,
-        parallel_config.tensor_parallel_size
+        parallel_config.data_parallel_size_local,
+        parallel_config.tensor_parallel_size,
+        parallel_config.expert_parallel_size,
+        parallel_config.expert_tensor_parallel_size,
    ])

    # Calculate maximum supported batch sizes considering model architecture