From aadc75c247924ab8e90c3d82f0ccabcc48cf90ab Mon Sep 17 00:00:00 2001 From: yiz-liu <136800916+yiz-liu@users.noreply.github.com> Date: Fri, 29 Aug 2025 16:06:49 +0800 Subject: [PATCH] [Fix] Resolve data-parallel (DP) assertion errors in TorchAir (#2626) ### What this PR does / why we need it? It is confirmed that `num_input_tokens` must be assigned the value of `maybe_padded_num_tokens` under all circumstances. ### Does this PR introduce _any_ user-facing change? None. ### How was this patch tested? Waiting for daily test for TorchAir. - vLLM version: v0.10.1.1 - vLLM main: https://github.com/vllm-project/vllm/commit/006477e60b49babfca96352c7c648f10fff4a053 Signed-off-by: Yizhou Liu --- vllm_ascend/torchair/torchair_model_runner.py | 2 +- vllm_ascend/worker/model_runner_v1.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/vllm_ascend/torchair/torchair_model_runner.py b/vllm_ascend/torchair/torchair_model_runner.py index 24fd33a..f371c7d 100644 --- a/vllm_ascend/torchair/torchair_model_runner.py +++ b/vllm_ascend/torchair/torchair_model_runner.py @@ -100,7 +100,7 @@ class NPUTorchairModelRunner(NPUModelRunner): num_tokens_across_dp = torch.full((self.dp_size, ), maybe_padded_num_tokens, dtype=torch.int32, - device="cpu") + device="npu") else: maybe_padded_num_tokens = num_tokens diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 15effb7..007a1c5 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -1095,9 +1095,9 @@ class NPUModelRunner(LoRAModelRunnerMixin): enable_dbo) = self._sync_metadata_across_dp(num_input_tokens, with_prefill, enable_dbo) - if self.use_aclgraph: - # When using TorchAir with DP, we have other plans for padding - num_input_tokens = maybe_padded_num_tokens + # TODO: Now that num_input_tokens is basically identical with maybe_padded_num_tokens + # We should consider removing maybe_padded_num_tokens later + num_input_tokens = maybe_padded_num_tokens # Hot-Swap lora model if self.lora_config: