[MOE Refactor] Remove QuantType in prepare_finalize.py (#6534)

### What this PR does / why we need it? To prevent confusion between different QuantType classes, we remove** QuantType in prepare_finalize.py - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.15.0 Signed-off-by: shenchuxiaofugui <1311027364@qq.com>
2026-02-10 15:59:58 +08:00
parent 34eecacace
commit 7cf285a77a
4 changed files with 9 additions and 24 deletions
--- a/vllm_ascend/ops/fused_moe/fused_moe.py
+++ b/vllm_ascend/ops/fused_moe/fused_moe.py
@@ -35,7 +35,7 @@ from vllm_ascend.eplb.core.eplb_utils import init_eplb_config
 from vllm_ascend.flash_common3_context import get_flash_common3_context, set_flash_common3_context
 from vllm_ascend.ops.fused_moe.experts_selector import select_experts, zero_experts_compute
 from vllm_ascend.ops.fused_moe.moe_comm_method import AllGatherCommImpl, FusedExpertsResult, setup_moe_comm_method
-from vllm_ascend.ops.fused_moe.prepare_finalize import QuantType
+from vllm_ascend.quantization.methods.base import QuantType
 from vllm_ascend.utils import (
    enable_sp,
    maybe_trans_nz,
@@ -235,22 +235,13 @@ class AscendFusedMoE(FusedMoE):
        self.quant_type = self._get_quant_type()

    def _get_quant_type(self) -> QuantType:
-        quant_method = self.quant_method
-        if not hasattr(quant_method, "quant_method") or quant_method.quant_method is None:
-            return QuantType.NONE
+        quant_type = QuantType.NONE
+        method = getattr(self.quant_method, "quant_method", None)

-        method = quant_method.quant_method
+        if method is not None:
+            quant_type = getattr(method, "quant_type", QuantType.NONE)

-        if hasattr(method, "quant_type"):
-            from vllm_ascend.quantization.methods.base import QuantType as SchemeQuantType
-
-            scheme_quant_type = method.quant_type
-            if scheme_quant_type == SchemeQuantType.W8A8:
-                return QuantType.W8A8
-            elif scheme_quant_type == SchemeQuantType.W4A8:
-                return QuantType.W4A8
-
-        return QuantType.NONE
+        return quant_type

    def update_expert_map(self, new_expert_map):
        self._expert_map = new_expert_map