Files
enginex-ascend-910-vllm/vllm_ascend/torchair/quantization/torchair_quantizer.py
2025-09-09 09:40:35 +08:00

30 lines
948 B
Python

from vllm_ascend.quantization.quantizer import VLLMAscendQuantizer
from vllm_ascend.torchair.quantization.torchair_w4a8_dynamic import (
TorchairAscendW4A8DynamicFusedMoEMethod,
TorchairAscendW4A8DynamicLinearMethod)
from vllm_ascend.torchair.quantization.torchair_w8a8_dynamic import (
TorchairAscendW8A8DynamicFusedMoEMethod,
TorchairAscendW8A8DynamicLinearMethod)
class TorchairW8A8DYNAMICQuantizer(VLLMAscendQuantizer):
@staticmethod
def build_linear_method():
return TorchairAscendW8A8DynamicLinearMethod()
@staticmethod
def build_moe_method():
return TorchairAscendW8A8DynamicFusedMoEMethod()
class TorchairW4A8DYNAMICQuantizer(VLLMAscendQuantizer):
@staticmethod
def build_linear_method():
return TorchairAscendW4A8DynamicLinearMethod()
@staticmethod
def build_moe_method():
return TorchairAscendW4A8DynamicFusedMoEMethod()