From 386a85eccc9a2247180faad2f8a00ad9c0afa4b1 Mon Sep 17 00:00:00 2001
From: wujinyuan1 <wujinyuan1@huawei.com>
Date: Tue, 25 Nov 2025 09:32:22 +0800
Subject: [PATCH] [Bugfix]Fix the hang issue of multimodal model when running
 with DP>1 (#4393)

### What this PR does / why we need it?
When cudagraph_mode is set to FULL_DECODE_ONLY, if dp > 1, the dummy-run
process will be triggered. When calling the update_attn_params function,
the num_tokens parameter needs to be passed, and this value is obtained
through positions.shape[0]. However, the multimodal model uses mRope
(multi-dimensional rotary positional embeddings), which causes the shape
of positions to be 2. As a result, the value obtained from
positions.shape[0] is incorrect. We solve this problem by replacing
positions.shape[0] with num_tokens.

### Does this PR introduce _any_ user-facing change?
NO

### How was this patch tested?
vLLM version: v0.11.0rc3
vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0

---------

Signed-off-by: wujinyuan1 <wjy9595@qq.com>
Co-authored-by: wujinyuan1 <wjy9595@qq.com>
---
 vllm_ascend/worker/model_runner_v1.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
index 5ad4340..ebbd608 100644
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@@ -2322,11 +2322,10 @@ class NPUModelRunner(LoRAModelRunnerMixin):
             if self.vllm_config.model_config.use_mla:
                 # FIXME: Try using `auto_dispatch_capture=True`
                 update_mla_attn_params(self.update_stream, forward_context,
-                                       positions.shape[0],
-                                       self.speculative_config)
+                                       num_tokens, self.speculative_config)
             else:
                 update_attn_params(self.update_stream, forward_context,
-                                   positions.shape[0])
+                                   num_tokens)
 
         if self.drafter and self.drafter.name == SpecDcodeType.EAGLE3:
             hidden_states, _ = hidden_states