From 99bf25af76a9f6759e262e351c7d41fed57f159f Mon Sep 17 00:00:00 2001
From: yiz-liu <136800916+yiz-liu@users.noreply.github.com>
Date: Mon, 25 Aug 2025 19:56:02 +0800
Subject: [PATCH] [Fix] Add operations in `_dummy_run` to maintain
 synchronization with `_process_reqs`, resolving a service hang (#2454)

### What this PR does / why we need it?
Fixes hang when batch size < DP size.

### Does this PR introduce _any_ user-facing change?
None.

### How was this patch tested?
After this change, the function in DP case will work now.

- vLLM version: v0.10.1.1
- vLLM main:
https://github.com/vllm-project/vllm/commit/d9a55204bad7bde505624f4ffb0464d98c86914a

Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com>
---
 vllm_ascend/worker/model_runner_v1.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
index 82b4996..3df2613 100644
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@@ -1911,6 +1911,10 @@ class NPUModelRunner(LoRAModelRunnerMixin):
             )
 
         # Padding for DP
+        num_pad, num_tokens_across_dp_native = self.get_dp_padding(num_tokens)
+        # num_tokens += num_pad  ## Uncomment this after TorchAir is removed
+
+        # Padding for DP (for TorchAir)
         (num_tokens, num_tokens_across_dp, with_prefill,
          _) = self._get_forward_metadata_across_dp_and_pad(
              num_tokens, with_prefill, False)