From aadc75c247924ab8e90c3d82f0ccabcc48cf90ab Mon Sep 17 00:00:00 2001
From: yiz-liu <136800916+yiz-liu@users.noreply.github.com>
Date: Fri, 29 Aug 2025 16:06:49 +0800
Subject: [PATCH] [Fix] Resolve data-parallel (DP) assertion errors in TorchAir
 (#2626)

### What this PR does / why we need it?
It is confirmed that `num_input_tokens` must be assigned the value of
`maybe_padded_num_tokens` under all circumstances.

### Does this PR introduce _any_ user-facing change?
None.

### How was this patch tested?
Waiting for daily test for TorchAir.
- vLLM version: v0.10.1.1
- vLLM main:
https://github.com/vllm-project/vllm/commit/006477e60b49babfca96352c7c648f10fff4a053

Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com>
---
 vllm_ascend/torchair/torchair_model_runner.py | 2 +-
 vllm_ascend/worker/model_runner_v1.py         | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/vllm_ascend/torchair/torchair_model_runner.py b/vllm_ascend/torchair/torchair_model_runner.py
index 24fd33a..f371c7d 100644
--- a/vllm_ascend/torchair/torchair_model_runner.py
+++ b/vllm_ascend/torchair/torchair_model_runner.py
@@ -100,7 +100,7 @@ class NPUTorchairModelRunner(NPUModelRunner):
             num_tokens_across_dp = torch.full((self.dp_size, ),
                                               maybe_padded_num_tokens,
                                               dtype=torch.int32,
-                                              device="cpu")
+                                              device="npu")
         else:
             maybe_padded_num_tokens = num_tokens
 
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
index 15effb7..007a1c5 100644
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@@ -1095,9 +1095,9 @@ class NPUModelRunner(LoRAModelRunnerMixin):
          enable_dbo) = self._sync_metadata_across_dp(num_input_tokens,
                                                      with_prefill, enable_dbo)
 
-        if self.use_aclgraph:
-            # When using TorchAir with DP, we have other plans for padding
-            num_input_tokens = maybe_padded_num_tokens
+        # TODO: Now that num_input_tokens is basically identical with maybe_padded_num_tokens
+        # We should consider removing maybe_padded_num_tokens later
+        num_input_tokens = maybe_padded_num_tokens
 
         # Hot-Swap lora model
         if self.lora_config: