From 66d5d0425c81687309872c73af8268c5fd43f047 Mon Sep 17 00:00:00 2001 From: Elfie Guo <164945471+elfiegg@users.noreply.github.com> Date: Wed, 3 Sep 2025 16:52:07 -0700 Subject: [PATCH] Minor update regarding issue #9704 (#9733) --- python/sglang/srt/models/deepseek_v2.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index bceb60cfe..147925f88 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -1678,9 +1678,11 @@ class DeepseekV2AttentionMLA(nn.Module): latent_cache_buf = forward_batch.token_to_kv_pool.get_key_buffer( self.attn_mha.layer_id ) - latent_cache = latent_cache_buf[ - forward_batch.prefix_chunk_kv_indices[i] - ].contiguous() + latent_cache = ( + latent_cache_buf[forward_batch.prefix_chunk_kv_indices[i]] + .contiguous() + .to(q.dtype) + ) kv_a_normed, k_pe = latent_cache.split( [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1