[Nightly][BugFix] Remove kv_cache nz test case for test_mla_preprocess_nq.py (#6505)
### What this PR does / why we need it? Remove kv_cache nz test case for test_mla_preprocess_nq.py. This case is added by https://github.com/vllm-project/vllm-ascend/pull/3072 but has not been tested on bf16 scenario. Results show that this is not currently supported. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed with existing test. - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.15.0 Signed-off-by: whx-sjtu <2952154980@qq.com>
This commit is contained in:
@@ -1,6 +1,5 @@
|
|||||||
import gc
|
import gc
|
||||||
|
|
||||||
import pytest
|
|
||||||
import torch
|
import torch
|
||||||
import torch_npu
|
import torch_npu
|
||||||
|
|
||||||
@@ -9,9 +8,8 @@ from vllm_ascend.utils import enable_custom_op
|
|||||||
enable_custom_op()
|
enable_custom_op()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("cache_mode", ["krope_ctkv", "nzcache"])
|
|
||||||
@torch.inference_mode()
|
@torch.inference_mode()
|
||||||
def test_mla_preprocess_kernel(cache_mode: str):
|
def test_mla_preprocess_kernel():
|
||||||
token_num = 1
|
token_num = 1
|
||||||
head_num = 2
|
head_num = 2
|
||||||
N_7168 = 7168
|
N_7168 = 7168
|
||||||
@@ -84,7 +82,7 @@ def test_mla_preprocess_kernel(cache_mode: str):
|
|||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
cache_mode=cache_mode,
|
cache_mode="krope_ctkv",
|
||||||
quant_mode="no_quant",
|
quant_mode="no_quant",
|
||||||
enable_inner_out=False,
|
enable_inner_out=False,
|
||||||
q_out0=q_nope_out,
|
q_out0=q_nope_out,
|
||||||
|
|||||||
Reference in New Issue
Block a user