Fix DeepSeek chunked prefill memory issue (#11149)
This commit is contained in:
@@ -1965,6 +1965,7 @@ class DeepseekV2AttentionMLA(nn.Module):
|
|||||||
tmp_lse = torch.empty_like(accum_lse)
|
tmp_lse = torch.empty_like(accum_lse)
|
||||||
merge_state_v2(output, lse, accum_output, accum_lse, tmp_output, tmp_lse)
|
merge_state_v2(output, lse, accum_output, accum_lse, tmp_output, tmp_lse)
|
||||||
accum_output, accum_lse = tmp_output, tmp_lse
|
accum_output, accum_lse = tmp_output, tmp_lse
|
||||||
|
del kv, k, v, output, lse, tmp_output, tmp_lse
|
||||||
|
|
||||||
return accum_output
|
return accum_output
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user