### What this PR does / why we need it? Move torchair related qunatization section into torchair dir to make the code clear. Next step we'll remove all torchair related code outside of torchair quantization. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? vLLM version: main vLLM main:ab9f2cfd19- vLLM version: v0.10.1.1 - vLLM main:959783fb99Signed-off-by: hust17yixuan <303660421@qq.com>
30 lines
948 B
Python
30 lines
948 B
Python
from vllm_ascend.quantization.quantizer import VLLMAscendQuantizer
|
|
from vllm_ascend.torchair.quantization.torchair_w4a8_dynamic import (
|
|
TorchairAscendW4A8DynamicFusedMoEMethod,
|
|
TorchairAscendW4A8DynamicLinearMethod)
|
|
from vllm_ascend.torchair.quantization.torchair_w8a8_dynamic import (
|
|
TorchairAscendW8A8DynamicFusedMoEMethod,
|
|
TorchairAscendW8A8DynamicLinearMethod)
|
|
|
|
|
|
class TorchairW8A8DYNAMICQuantizer(VLLMAscendQuantizer):
|
|
|
|
@staticmethod
|
|
def build_linear_method():
|
|
return TorchairAscendW8A8DynamicLinearMethod()
|
|
|
|
@staticmethod
|
|
def build_moe_method():
|
|
return TorchairAscendW8A8DynamicFusedMoEMethod()
|
|
|
|
|
|
class TorchairW4A8DYNAMICQuantizer(VLLMAscendQuantizer):
|
|
|
|
@staticmethod
|
|
def build_linear_method():
|
|
return TorchairAscendW4A8DynamicLinearMethod()
|
|
|
|
@staticmethod
|
|
def build_moe_method():
|
|
return TorchairAscendW4A8DynamicFusedMoEMethod()
|