From dd55736ee4c4901961c1a04d9b8f6b94612f6686 Mon Sep 17 00:00:00 2001 From: Wangbei25 Date: Wed, 25 Mar 2026 23:23:37 +0800 Subject: [PATCH] fix uncompatible between fc1 and non-sp-padding (#7643) cherry pick https://github.com/vllm-project/vllm-ascend/pull/7614 ### What this PR does / why we need it? fix uncompatible between fc1 and non-sp-padding After PR [non-sp-padding](https://github.com/vllm-project/vllm-ascend/pull/7297), kimi2.5 open flashcomm1 will raise an error : The expanded size of the tensor do not match the existing size at non-singleton dimension 0. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.18.0 - vLLM-Ascend main: 9976e685b762e97cc2122eb16c4c8a6af07b46c6 Signed-off-by: Wangbei25 Co-authored-by: Wangbei25 --- vllm_ascend/worker/model_runner_v1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 677e9925..f414d4f9 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -1976,7 +1976,7 @@ class NPUModelRunner(GPUModelRunner): _, num_tokens_across_dp, synced_cudagraph_mode = self._sync_batch_across_dp( num_tokens_padded=num_tokens_padded, cudagraph_mode=cudagraph_mode.value, - allow_dp_padding=cudagraph_mode != CUDAGraphMode.NONE, + allow_dp_padding=(cudagraph_mode != CUDAGraphMode.NONE) or enable_sp(self.vllm_config), ) # Extract DP padding if there is any