diff --git a/python/sglang/srt/mem_cache/memory_pool.py b/python/sglang/srt/mem_cache/memory_pool.py index cc3faea0a..3b567dc76 100644 --- a/python/sglang/srt/mem_cache/memory_pool.py +++ b/python/sglang/srt/mem_cache/memory_pool.py @@ -951,7 +951,7 @@ class AscendMLAPagedTokenToKVPool(MLATokenToKVPool): cache_k = cache_k.to(self.dtype) if self.store_dtype != self.dtype: - cache_k = cache_k.view(store_dtype) + cache_k = cache_k.view(self.store_dtype) import torch_npu