From 807686dec9a45668286234a2842fdc5209756bb9 Mon Sep 17 00:00:00 2001 From: NeverRaR <44917563+NeverRaR@users.noreply.github.com> Date: Thu, 23 Oct 2025 15:52:17 +0800 Subject: [PATCH] perf : optimize memory for deepseek mtp (#2713) ### What this PR does / why we need it? delete the temp tensor to optimize memory for deepseek mtp for torchair case - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 Signed-off-by: boying <897013703@qq.com> --- vllm_ascend/torchair/models/torchair_deepseek_mtp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm_ascend/torchair/models/torchair_deepseek_mtp.py b/vllm_ascend/torchair/models/torchair_deepseek_mtp.py index c8503e33..2285bb1e 100644 --- a/vllm_ascend/torchair/models/torchair_deepseek_mtp.py +++ b/vllm_ascend/torchair/models/torchair_deepseek_mtp.py @@ -102,6 +102,7 @@ class TorchairDeepSeekMultiTokenPredictorLayer(DeepSeekMultiTokenPredictorLayer hidden_states = self.eh_proj( torch.cat([inputs_embeds, previous_hidden_states], dim=-1)) + del inputs_embeds, previous_hidden_states replace_allreduce = hidden_states.shape[0] % self.tp_size == 0 hidden_states, residual = self.mtp_block(