xc-llm-ascend/vllm_ascend/torchair/quantization/torchair_quantizer.py

from vllm_ascend.quantization.quantizer import VLLMAscendQuantizer
from vllm_ascend.torchair.quantization.torchair_w4a8_dynamic import (
    TorchairAscendW4A8DynamicFusedMoEMethod,
    TorchairAscendW4A8DynamicLinearMethod)
from vllm_ascend.torchair.quantization.torchair_w8a8_dynamic import (
    TorchairAscendW8A8DynamicFusedMoEMethod,
    TorchairAscendW8A8DynamicLinearMethod)


class TorchairW8A8DYNAMICQuantizer(VLLMAscendQuantizer):

    @staticmethod
    def build_linear_method():
        return TorchairAscendW8A8DynamicLinearMethod()

    @staticmethod
    def build_moe_method():
        return TorchairAscendW8A8DynamicFusedMoEMethod()


class TorchairW4A8DYNAMICQuantizer(VLLMAscendQuantizer):

    @staticmethod
    def build_linear_method():
        return TorchairAscendW4A8DynamicLinearMethod()

    @staticmethod
    def build_moe_method():
        return TorchairAscendW4A8DynamicFusedMoEMethod()
[3/N][refactor] refactoer quantization (#2504) ### What this PR does / why we need it? Move torchair related qunatization section into torchair dir to make the code clear. Next step we'll remove all torchair related code outside of torchair quantization. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? vLLM version: main vLLM main: https://github.com/vllm-project/vllm/commit/ab9f2cfd1942f7ddfee658ce86ea96b4789862af - vLLM version: v0.10.1.1 - vLLM main: https://github.com/vllm-project/vllm/commit/959783fb996d0d15598f45ca12ffcbee4b681424 Signed-off-by: hust17yixuan <303660421@qq.com> 2025-08-27 10:45:50 +08:00			`from vllm_ascend.quantization.quantizer import VLLMAscendQuantizer`
			`from vllm_ascend.torchair.quantization.torchair_w4a8_dynamic import (`
			`TorchairAscendW4A8DynamicFusedMoEMethod,`
			`TorchairAscendW4A8DynamicLinearMethod)`
			`from vllm_ascend.torchair.quantization.torchair_w8a8_dynamic import (`
			`TorchairAscendW8A8DynamicFusedMoEMethod,`
			`TorchairAscendW8A8DynamicLinearMethod)`


			`class TorchairW8A8DYNAMICQuantizer(VLLMAscendQuantizer):`

			`@staticmethod`
			`def build_linear_method():`
			`return TorchairAscendW8A8DynamicLinearMethod()`

			`@staticmethod`
			`def build_moe_method():`
			`return TorchairAscendW8A8DynamicFusedMoEMethod()`


			`class TorchairW4A8DYNAMICQuantizer(VLLMAscendQuantizer):`

			`@staticmethod`
			`def build_linear_method():`
			`return TorchairAscendW4A8DynamicLinearMethod()`

			`@staticmethod`
			`def build_moe_method():`
			`return TorchairAscendW4A8DynamicFusedMoEMethod()`