feat: add mtp ut and fix some bugs (#2453)

### What this PR does / why we need it? Fix mtp mode ut ### Does this PR introduce _any_ user-facing change? Nothing ### How was this patch tested? This can be tested in the same way as a unit test. - vLLM version: v0.10.0 - vLLM main: 53415653ff Signed-off-by: 赵江江 <zhaojiangjiang1@h-partners.com> Co-authored-by: 赵江江 <zhaojiangjiang1@h-partners.com>
2025-08-22 17:09:08 +08:00
parent dd04a96ee3
commit 3629bc4431
10 changed files with 129 additions and 75 deletions
--- a/vllm_ascend/ops/fused_moe.py
+++ b/vllm_ascend/ops/fused_moe.py
@@ -1178,7 +1178,7 @@ class AscendFusedMoE(FusedMoE):
        if self.scoring_func != "softmax" and not self.use_grouped_topk:
            raise ValueError("Only softmax scoring function is supported for "
                             "non-grouped topk.")
-        self.moe = FusedMoEConfig.make(
+        moe = FusedMoEConfig.make(
            num_experts=self.global_num_experts,
            experts_per_token=top_k,
            hidden_dim=hidden_size,
@@ -1188,8 +1188,10 @@ class AscendFusedMoE(FusedMoE):
            in_dtype=params_dtype,
            quant_config=quant_config)

+        self.moe_config = moe
+
        if quant_config is None:
-            self.quant_method = AscendUnquantizedFusedMoEMethod(self.moe)
+            self.quant_method = AscendUnquantizedFusedMoEMethod(moe)
        else:
            self.quant_method = quant_config.get_quant_method(self, prefix)