From 4d6444d5fdf08f39451378ca508d6bcf133cab75 Mon Sep 17 00:00:00 2001
From: whx <56632993+whx-sjtu@users.noreply.github.com>
Date: Tue, 3 Feb 2026 18:26:51 +0800
Subject: [PATCH] [Nightly][BugFix] Remove kv_cache nz test case for
 test_mla_preprocess_nq.py (#6505)

### What this PR does / why we need it?
Remove kv_cache nz test case for test_mla_preprocess_nq.py. This case is
added by https://github.com/vllm-project/vllm-ascend/pull/3072 but has
not been tested on bf16 scenario. Results show that this is not
currently supported.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
CI passed with existing test.


- vLLM version: v0.15.0
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.15.0

Signed-off-by: whx-sjtu <2952154980@qq.com>
---
 .../ops/singlecard_ops/test_mla_preprocess_nq.py            | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py b/tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py
index 196ffafc..b18c63f6 100644
--- a/tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py
+++ b/tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py
@@ -1,6 +1,5 @@
 import gc
 
-import pytest
 import torch
 import torch_npu
 
@@ -9,9 +8,8 @@ from vllm_ascend.utils import enable_custom_op
 enable_custom_op()
 
 
-@pytest.mark.parametrize("cache_mode", ["krope_ctkv", "nzcache"])
 @torch.inference_mode()
-def test_mla_preprocess_kernel(cache_mode: str):
+def test_mla_preprocess_kernel():
     token_num = 1
     head_num = 2
     N_7168 = 7168
@@ -84,7 +82,7 @@ def test_mla_preprocess_kernel(cache_mode: str):
         None,
         None,
         None,
-        cache_mode=cache_mode,
+        cache_mode="krope_ctkv",
         quant_mode="no_quant",
         enable_inner_out=False,
         q_out0=q_nope_out,