From 2bb7e55022c3a558145a1b17ba3c93b4ab6bf00f Mon Sep 17 00:00:00 2001
From: CaveNightingale <51874645+CaveNightingale@users.noreply.github.com>
Date: Fri, 15 Aug 2025 16:59:52 +0800
Subject: [PATCH] [Bugfix][PD]fix non-working disaggregated prefill (#2374)

### What this PR does / why we need it?

Mainline vLLM fixes its disaggregated prefill in
https://github.com/vllm-project/vllm/pull/22598 . But it is still not
working in vllm-ascend.
To be concrete, decoder instances crash before vllm's fix and hang after
vllm's fix in ascend devices.
This patch allows disaggregated prefill to work.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Qwen3-0.6B 1P1D tp=1 dp=1


- vLLM version: v0.10.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/0fe85087a950f3ca94d60293c865c0e6c05e6eff

---------

Signed-off-by: CaveNightingale <cavenightingale@foxmail.com>
---
 vllm_ascend/worker/model_runner_v1.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
index 14f89bb..ebf76eb 100644
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@@ -1636,7 +1636,7 @@ class NPUModelRunner(LoRAModelRunnerMixin):
              finished_recving) = (self._process_reqs(scheduler_output,
                                                      intermediate_tensors))
         kv_connector_output = None
-        if finished_sending is not None and finished_recving is not None:
+        if finished_sending is not None or finished_recving is not None:
             kv_connector_output = KVConnectorOutput(
                 finished_sending=finished_sending,
                 finished_recving=finished_recving)
@@ -1838,8 +1838,9 @@ class NPUModelRunner(LoRAModelRunnerMixin):
             return EMPTY_MODEL_RUNNER_OUTPUT
 
         output = copy.copy(EMPTY_MODEL_RUNNER_OUTPUT)
-        output.finished_sending = finished_sending
-        output.finished_recving = finished_recving
+        output.kv_connector_output = KVConnectorOutput(
+            finished_sending=finished_sending,
+            finished_recving=finished_recving)
         return output
 
     @staticmethod