[Feature] Support NPUGraph for DeepSeek on Ascend NPU (#9355)

Co-authored-by: Even Zhou <even.y.zhou@outlook.com>
This commit is contained in:
chenxu140
2025-08-29 07:06:24 +08:00
committed by GitHub
parent dc20c22f76
commit 74dd4249ac
7 changed files with 307 additions and 105 deletions

View File

@@ -918,6 +918,7 @@ class AscendMLAPagedTokenToKVPool(MLATokenToKVPool):
layer_num,
self.size // self.page_size + 1,
self.page_size,
1,
self.kv_lora_rank,
),
dtype=self.store_dtype,
@@ -928,6 +929,7 @@ class AscendMLAPagedTokenToKVPool(MLATokenToKVPool):
layer_num,
self.size // self.page_size + 1,
self.page_size,
1,
self.qk_rope_head_dim,
),
dtype=self.store_dtype,
@@ -1000,9 +1002,11 @@ class AscendMLAPagedTokenToKVPool(MLATokenToKVPool):
layer_id = layer.layer_id
if cache_k.dtype != self.dtype:
cache_k = cache_k.to(self.dtype)
cache_v = cache_v.to(self.dtype)
if self.store_dtype != self.dtype:
cache_k = cache_k.view(self.store_dtype)
cache_v = cache_v.view(self.store_dtype)
if cache_v is None:
cache_k, cache_v = cache_k.split(