Revert "[MoE] [Refactor] Remove manual memory cleanup (#3365)" (#3483)

This reverts commit 4f937f561d. ### What this PR does / why we need it? This reverts commit 4f937f561d. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? e2e & ut - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 Signed-off-by: Pr0Wh1teGivee <calvin_zhu0210@outlook.com>
2025-10-15 22:25:46 +08:00
parent f69a83b7ba
commit cec1fab509
8 changed files with 500 additions and 572 deletions
--- a/vllm_ascend/ops/common_fused_moe.py
+++ b/vllm_ascend/ops/common_fused_moe.py
@@ -301,7 +301,7 @@ class AscendFusedMoE(FusedMoE):
        enable_force_load_balance = forward_context.in_profile_run

        forward_context = get_forward_context()
-        hidden_states, router_logits, mc2_mask, context_metadata = forward_context.moe_comm_method.prepare(
+        hidden_states, router_logits = forward_context.moe_comm_method.prepare(
            hidden_states=hidden_states,
            router_logits=router_logits,
            replace_allreduce=forward_context.sp_enabled,
@@ -329,8 +329,7 @@ class AscendFusedMoE(FusedMoE):
            shared_experts=None,
            enable_force_load_balance=enable_force_load_balance,
            log2phy=self.log2phy,
-            global_redundant_expert_num=self.global_redundant_expert_num,
-            mc2_mask=mc2_mask)
+            global_redundant_expert_num=self.global_redundant_expert_num)

        if isinstance(final_hidden_states, tuple):
            final_hidden_states, group_list_type, expert_tokens = final_hidden_states
@@ -341,8 +340,7 @@ class AscendFusedMoE(FusedMoE):

        final_hidden_states = forward_context.moe_comm_method.finalize(
            hidden_states=final_hidden_states,
-            reduce_results=self.reduce_results,
-            context_metadata=context_metadata)
+            reduce_results=self.reduce_results)

        return final_hidden_states