[1/N][refactor] torchair fused_moe refactor (#2438)

### What this PR does / why we need it?
Move torchair related fused_moe section into torchair_fused_moe to make
the code clear. Next step we'll remove all torchair related code outside
of torchair_fused_moe .

### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
vLLM version: v0.10.0
vLLM main:
08d5f7113a

- vLLM version: v0.10.1.1
- vLLM main:
170e8ea9ea

Signed-off-by: hust17yixuan <303660421@qq.com>
This commit is contained in:
Wang Yixuan
2025-08-25 15:46:10 +08:00
committed by GitHub
parent 334c44613a
commit 0f81e032f0
5 changed files with 1974 additions and 6 deletions

View File

@@ -70,9 +70,9 @@ from vllm.model_executor.models.utils import (
from vllm.sequence import IntermediateTensors
from vllm_ascend.ascend_config import get_ascend_config
from vllm_ascend.ops.fused_moe import AscendFusedMoE
from vllm_ascend.quantization.quant_config import AscendLinearMethod
from vllm_ascend.quantization.w8a8_dynamic import AscendW8A8DynamicLinearMethod
from vllm_ascend.torchair.ops.torchair_fused_moe import TorchairAscendFusedMoE
from vllm_ascend.utils import dispose_tensor, npu_prefetch
@@ -335,7 +335,7 @@ class TorchairDeepseekV2MoE(nn.Module):
else:
self.gate.e_score_correction_bias = None
self.experts = AscendFusedMoE(
self.experts = TorchairAscendFusedMoE(
num_experts=config.n_routed_experts,
top_k=config.num_experts_per_tok,
hidden_size=config.hidden_size,
@@ -951,7 +951,7 @@ class TorchairDeepseekV2ForCausalLM(DeepseekV2ForCausalLM):
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
expert_params_mapping = AscendFusedMoE.make_expert_params_mapping(
expert_params_mapping = TorchairAscendFusedMoE.make_expert_params_mapping(
ckpt_gate_proj_name="gate_proj",
ckpt_down_proj_name="down_proj",
ckpt_up_proj_name="up_proj",