From 4d6444d5fdf08f39451378ca508d6bcf133cab75 Mon Sep 17 00:00:00 2001 From: whx <56632993+whx-sjtu@users.noreply.github.com> Date: Tue, 3 Feb 2026 18:26:51 +0800 Subject: [PATCH] [Nightly][BugFix] Remove kv_cache nz test case for test_mla_preprocess_nq.py (#6505) ### What this PR does / why we need it? Remove kv_cache nz test case for test_mla_preprocess_nq.py. This case is added by https://github.com/vllm-project/vllm-ascend/pull/3072 but has not been tested on bf16 scenario. Results show that this is not currently supported. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed with existing test. - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.15.0 Signed-off-by: whx-sjtu <2952154980@qq.com> --- .../ops/singlecard_ops/test_mla_preprocess_nq.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py b/tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py index 196ffafc..b18c63f6 100644 --- a/tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py +++ b/tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py @@ -1,6 +1,5 @@ import gc -import pytest import torch import torch_npu @@ -9,9 +8,8 @@ from vllm_ascend.utils import enable_custom_op enable_custom_op() -@pytest.mark.parametrize("cache_mode", ["krope_ctkv", "nzcache"]) @torch.inference_mode() -def test_mla_preprocess_kernel(cache_mode: str): +def test_mla_preprocess_kernel(): token_num = 1 head_num = 2 N_7168 = 7168 @@ -84,7 +82,7 @@ def test_mla_preprocess_kernel(cache_mode: str): None, None, None, - cache_mode=cache_mode, + cache_mode="krope_ctkv", quant_mode="no_quant", enable_inner_out=False, q_out0=q_nope_out,