From 7153d8890b91b7807e817acc2a32fffbbe41e2fc Mon Sep 17 00:00:00 2001
From: Jade Zheng <zheng.shoujian@outlook.com>
Date: Fri, 23 May 2025 10:15:29 +0800
Subject: [PATCH] [Feature] Impl v1 disaggregated prefill in ascend scheduler
 (#852)

Implement save kv cache logic for v1 disaggregated prefill in ascend
scheduler

This PR adds support for saving kv cache in the ascend scheduler, which
is part of the v1 disaggregated prefill design. The load functionality
is not yet implemented.

Signed-off-by: Jade Zheng <zheng.shoujian@outlook.com>
---
 vllm_ascend/core/scheduler.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/vllm_ascend/core/scheduler.py b/vllm_ascend/core/scheduler.py
index 9ae1735..122f7b9 100644
--- a/vllm_ascend/core/scheduler.py
+++ b/vllm_ascend/core/scheduler.py
@@ -51,6 +51,11 @@ class AscendScheduler(Scheduler):
         self.scheduled_req_ids: set[str] = set()
         self.running: list[Request] = []
 
+        if self.vllm_config.kv_transfer_config is not None and \
+            self.vllm_config.kv_transfer_config.is_kv_consumer:
+            raise ValueError(
+                "AscendScheduler cannot be used for decode nodes. ")
+
     def schedule(self) -> SchedulerOutput:
         if self.scheduler_config.chunked_prefill_enabled:
             return super().schedule()
@@ -287,6 +292,14 @@ class AscendScheduler(Scheduler):
             grammar_bitmask=None,
         )
 
+        # NOTE(Kuntai): this function is designed for multiple purposes:
+        # 1. Plan the KV cache store
+        # 2. Wrap up all the KV cache load / save ops into an opaque object
+        # 3. Clear the internal states of the connector
+        if self.connector is not None:
+            meta = self.connector.build_connector_meta(scheduler_output)
+            scheduler_output.kv_connector_metadata = meta
+
         # Advance the number of computed tokens for the request AFTER
         # the request is scheduled.
         # 1. The scheduler_output of the current step has to include the