From 44b1fbe258e0b92812ec9a73ddcdb2efa13e76d4 Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Thu, 2 Oct 2025 14:56:59 +0800 Subject: [PATCH] Fix DeepSeek chunked prefill memory issue (#11149) --- python/sglang/srt/models/deepseek_v2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 0db0ca164..73ff4c1c7 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -1965,6 +1965,7 @@ class DeepseekV2AttentionMLA(nn.Module): tmp_lse = torch.empty_like(accum_lse) merge_state_v2(output, lse, accum_output, accum_lse, tmp_output, tmp_lse) accum_output, accum_lse = tmp_output, tmp_lse + del kv, k, v, output, lse, tmp_output, tmp_lse return accum_output