[Bugfix] fix kv nz accuracy bug (#2988)
when `enable_kv_nz` is true, output of Deepseek R1 is invalid.
- vLLM version: v0.10.2
- vLLM main:
2b85697031
Signed-off-by: realliujiaxu <realliujiaxu@163.com>
This commit is contained in:
@@ -789,7 +789,7 @@ class AscendMLAImpl(MLAAttentionImpl):
|
||||
# npu_kv_rmsnorm_rope_cache needs [B, N, S, D]
|
||||
kv_no_split = kv_no_split.view(
|
||||
B, N, S, self.kv_lora_rank + self.qk_rope_head_dim)
|
||||
cache_mode = "PA_BLK_NZ" if self.enable_kv_nz else "PA"
|
||||
cache_mode = "PA_NZ" if self.enable_kv_nz else "PA"
|
||||
_, _, k_pe, k_nope = torch_npu.npu_kv_rmsnorm_rope_cache(
|
||||
kv_no_split,
|
||||
self.kv_a_layernorm.weight,
|
||||
|
||||
@@ -1006,7 +1006,7 @@ class AscendMLATorchairImpl(MLAAttentionImpl):
|
||||
kv = self.kv_a_proj_with_mqa(hidden_states)[0]
|
||||
# npu_kv_rmsnorm_rope_cache needs [B, N, S, D]
|
||||
kv = kv.view(B, N, S, self.kv_lora_rank + self.qk_rope_head_dim)
|
||||
cache_mode = "PA_BLK_NZ" if self.enable_kv_nz else "PA"
|
||||
cache_mode = "PA_NZ" if self.enable_kv_nz else "PA"
|
||||
_, _, k_pe, k_nope = torch_npu.npu_kv_rmsnorm_rope_cache(
|
||||
kv,
|
||||
self.kv_a_layernorm.weight,
|
||||
|
||||
Reference in New Issue
Block a user