[Feature] Support DeepSeek for A5 (#7232)

### What this PR does / why we need it? Add A5 mla operators to support running DeepSeek models on A5. - vLLM version: v0.17.0 - vLLM main: 4034c3d32e Signed-off-by: Li Jiahang <216526138+lijiahang226@users.noreply.github.com>
2026-03-23 20:28:26 +08:00
parent 13397e9cb7
commit 170dcbda62
2 changed files with 27 additions and 2 deletions
--- a/vllm_ascend/device/device_op.py
+++ b/vllm_ascend/device/device_op.py
@@ -171,6 +171,18 @@ class BaseDeviceAdaptor:
            output_dtype=fallback_output_dtype,
        )[0]

+    @staticmethod
+    def mla_cache_load(cache_kv_c, cache_k_pe, block_table, context_seq_len_npu, seq_starts, key, value):
+        torch_npu.atb.npu_paged_cache_load(
+            cache_kv_c,
+            cache_k_pe,
+            block_table,
+            context_seq_len_npu,
+            seq_starts=seq_starts,
+            key=key,
+            value=value,
+        )
+

 class A5DeviceAdaptor(BaseDeviceAdaptor):
    @classmethod
@@ -375,6 +387,18 @@ class A5DeviceAdaptor(BaseDeviceAdaptor):
            **gmm2_kwargs,
        )[0]

+    @staticmethod
+    def mla_cache_load(cache_kv_c, cache_k_pe, block_table, context_seq_len_npu, seq_offset, key, value):
+        torch_npu.npu_gather_pa_kv_cache(
+            cache_kv_c,
+            cache_k_pe,
+            block_table,
+            context_seq_len_npu,
+            seq_offset=seq_offset,
+            key=key,
+            value=value,
+        )
+

 def get_device_adaptor() -> type["BaseDeviceAdaptor"]:
    ascend_device_type = get_ascend_device_type()