[MOE Refactor] Remove QuantType in prepare_finalize.py (#6534)

### What this PR does / why we need it? To prevent confusion between different QuantType classes, we remove** QuantType in prepare_finalize.py - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.15.0 Signed-off-by: shenchuxiaofugui <1311027364@qq.com>
2026-02-10 15:59:58 +08:00
parent 34eecacace
commit 7cf285a77a
4 changed files with 9 additions and 24 deletions
--- a/vllm_ascend/ops/fused_moe/prepare_finalize.py
+++ b/vllm_ascend/ops/fused_moe/prepare_finalize.py
@@ -15,7 +15,6 @@
 # This file is a part of the vllm-ascend project.

 from abc import ABC, abstractmethod
-from enum import Enum

 import torch
 import torch.distributed as dist
@@ -32,15 +31,10 @@ from vllm.model_executor.layers.fused_moe import FusedMoEConfig

 from vllm_ascend.ascend_config import get_ascend_config
 from vllm_ascend.distributed.utils import fc3_all_gather_and_maybe_unpad_impl
+from vllm_ascend.quantization.methods.base import QuantType
 from vllm_ascend.utils import enable_sp, npu_stream_switch, prefill_context_parallel_enable


-class QuantType(Enum):
-    NONE = 0
-    W8A8 = 1
-    W4A8 = 2
-
-
 class PrepareAndFinalize(ABC):
    """
    Abstract base class for MoE (Mixture-of-Experts) tensor preparation and finalization