From 2101d93b4f2749cfd8a15b70a4c1f7f57b606075 Mon Sep 17 00:00:00 2001
From: Lifu Huang <lifu.hlf@gmail.com>
Date: Mon, 22 Sep 2025 01:09:58 -0700
Subject: [PATCH] Fix CI TestChunkedSGMV (#10737)

---
 python/sglang/utils.py                     |  6 ++++++
 test/srt/lora/test_chunked_sgmv_backend.py | 11 +++++++++++
 2 files changed, 17 insertions(+)

diff --git a/python/sglang/utils.py b/python/sglang/utils.py
index 91c3454a1..07d906440 100644
--- a/python/sglang/utils.py
+++ b/python/sglang/utils.py
@@ -621,6 +621,12 @@ class CachedKernel:
 
         return complete_args
 
+    def _clear_cache(self):
+        """
+        Clear the kernel cache for testing purposes.
+        """
+        self.kernel_cache.clear()
+
 
 def cached_triton_kernel(key_fn=None):
     """
diff --git a/test/srt/lora/test_chunked_sgmv_backend.py b/test/srt/lora/test_chunked_sgmv_backend.py
index 6df369f81..2cfde12db 100644
--- a/test/srt/lora/test_chunked_sgmv_backend.py
+++ b/test/srt/lora/test_chunked_sgmv_backend.py
@@ -10,11 +10,18 @@ from sglang.srt.lora.triton_ops import (
     chunked_sgmv_lora_expand_forward,
     chunked_sgmv_lora_shrink_forward,
 )
+from sglang.srt.lora.triton_ops.chunked_sgmv_expand import _chunked_lora_expand_kernel
+from sglang.srt.lora.triton_ops.chunked_sgmv_shrink import _chunked_lora_shrink_kernel
 from sglang.srt.lora.utils import LoRABatchInfo
 
 CHUNK_SIZE = 16
 
 
+def reset_kernel_cache():
+    _chunked_lora_shrink_kernel._clear_cache()
+    _chunked_lora_expand_kernel._clear_cache()
+
+
 def safe_matmul(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
     """Matrix multiplication with mixed precision handling for float16"""
     result = torch.matmul(a.float(), b.float())
@@ -436,6 +443,10 @@ class TestChunkedSGMV(unittest.TestCase):
         List[str],
     ]:
         """Create test batch with specified composition and mode"""
+
+        # Reset kernel cache to avoid cross-test contamination
+        reset_kernel_cache()
+
         seq_lengths = self.generate_sequence_lengths(
             batch_size, batch_mode, 1, self.max_seq_len
         )