[1/N][refactor] torchair fused_moe refactor (#2438)

### What this PR does / why we need it? Move torchair related fused_moe section into torchair_fused_moe to make the code clear. Next step we'll remove all torchair related code outside of torchair_fused_moe . ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? vLLM version: v0.10.0 vLLM main: 08d5f7113a - vLLM version: v0.10.1.1 - vLLM main: 170e8ea9ea Signed-off-by: hust17yixuan <303660421@qq.com>
2025-08-25 15:46:10 +08:00
parent 334c44613a
commit 0f81e032f0
5 changed files with 1974 additions and 6 deletions
--- a/vllm_ascend/torchair/models/torchair_deepseek_v2.py
+++ b/vllm_ascend/torchair/models/torchair_deepseek_v2.py
@@ -70,9 +70,9 @@ from vllm.model_executor.models.utils import (
 from vllm.sequence import IntermediateTensors

 from vllm_ascend.ascend_config import get_ascend_config
-from vllm_ascend.ops.fused_moe import AscendFusedMoE
 from vllm_ascend.quantization.quant_config import AscendLinearMethod
 from vllm_ascend.quantization.w8a8_dynamic import AscendW8A8DynamicLinearMethod
+from vllm_ascend.torchair.ops.torchair_fused_moe import TorchairAscendFusedMoE
 from vllm_ascend.utils import dispose_tensor, npu_prefetch


@@ -335,7 +335,7 @@ class TorchairDeepseekV2MoE(nn.Module):
        else:
            self.gate.e_score_correction_bias = None

-        self.experts = AscendFusedMoE(
+        self.experts = TorchairAscendFusedMoE(
            num_experts=config.n_routed_experts,
            top_k=config.num_experts_per_tok,
            hidden_size=config.hidden_size,
@@ -951,7 +951,7 @@ class TorchairDeepseekV2ForCausalLM(DeepseekV2ForCausalLM):

        # Params for weights, fp8 weight scales, fp8 activation scales
        # (param_name, weight_name, expert_id, shard_id)
-        expert_params_mapping = AscendFusedMoE.make_expert_params_mapping(
+        expert_params_mapping = TorchairAscendFusedMoE.make_expert_params_mapping(
            ckpt_gate_proj_name="gate_proj",
            ckpt_down_proj_name="down_proj",
            ckpt_up_proj_name="up_proj",