[Feature] Support NPUGraph for DeepSeek on Ascend NPU (#9355)
Co-authored-by: Even Zhou <even.y.zhou@outlook.com>
This commit is contained in:
@@ -918,6 +918,7 @@ class AscendMLAPagedTokenToKVPool(MLATokenToKVPool):
|
||||
layer_num,
|
||||
self.size // self.page_size + 1,
|
||||
self.page_size,
|
||||
1,
|
||||
self.kv_lora_rank,
|
||||
),
|
||||
dtype=self.store_dtype,
|
||||
@@ -928,6 +929,7 @@ class AscendMLAPagedTokenToKVPool(MLATokenToKVPool):
|
||||
layer_num,
|
||||
self.size // self.page_size + 1,
|
||||
self.page_size,
|
||||
1,
|
||||
self.qk_rope_head_dim,
|
||||
),
|
||||
dtype=self.store_dtype,
|
||||
@@ -1000,9 +1002,11 @@ class AscendMLAPagedTokenToKVPool(MLATokenToKVPool):
|
||||
layer_id = layer.layer_id
|
||||
if cache_k.dtype != self.dtype:
|
||||
cache_k = cache_k.to(self.dtype)
|
||||
cache_v = cache_v.to(self.dtype)
|
||||
|
||||
if self.store_dtype != self.dtype:
|
||||
cache_k = cache_k.view(self.store_dtype)
|
||||
cache_v = cache_v.view(self.store_dtype)
|
||||
|
||||
if cache_v is None:
|
||||
cache_k, cache_v = cache_k.split(
|
||||
|
||||
Reference in New Issue
Block a user