diff --git a/python/sglang/srt/mem_cache/memory_pool.py b/python/sglang/srt/mem_cache/memory_pool.py index d8ea694c5..b35a1ad1e 100644 --- a/python/sglang/srt/mem_cache/memory_pool.py +++ b/python/sglang/srt/mem_cache/memory_pool.py @@ -326,7 +326,7 @@ class MHATokenToKVPool(KVCache): cache_k = cache_k.view(self.store_dtype) cache_v = cache_v.view(self.store_dtype) - if self.capture_mode: + if self.capture_mode and cache_k.shape[0] < 4: self.alt_stream.wait_stream(torch.cuda.current_stream()) with torch.cuda.stream(self.alt_stream): self.k_buffer[layer_id][loc] = cache_k