[3/N][refactor] refactoer quantization (#2504)
### What this PR does / why we need it? Move torchair related qunatization section into torchair dir to make the code clear. Next step we'll remove all torchair related code outside of torchair quantization. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? vLLM version: main vLLM main:ab9f2cfd19- vLLM version: v0.10.1.1 - vLLM main:959783fb99Signed-off-by: hust17yixuan <303660421@qq.com>
This commit is contained in:
29
vllm_ascend/torchair/quantization/torchair_quantizer.py
Normal file
29
vllm_ascend/torchair/quantization/torchair_quantizer.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from vllm_ascend.quantization.quantizer import VLLMAscendQuantizer
|
||||
from vllm_ascend.torchair.quantization.torchair_w4a8_dynamic import (
|
||||
TorchairAscendW4A8DynamicFusedMoEMethod,
|
||||
TorchairAscendW4A8DynamicLinearMethod)
|
||||
from vllm_ascend.torchair.quantization.torchair_w8a8_dynamic import (
|
||||
TorchairAscendW8A8DynamicFusedMoEMethod,
|
||||
TorchairAscendW8A8DynamicLinearMethod)
|
||||
|
||||
|
||||
class TorchairW8A8DYNAMICQuantizer(VLLMAscendQuantizer):
|
||||
|
||||
@staticmethod
|
||||
def build_linear_method():
|
||||
return TorchairAscendW8A8DynamicLinearMethod()
|
||||
|
||||
@staticmethod
|
||||
def build_moe_method():
|
||||
return TorchairAscendW8A8DynamicFusedMoEMethod()
|
||||
|
||||
|
||||
class TorchairW4A8DYNAMICQuantizer(VLLMAscendQuantizer):
|
||||
|
||||
@staticmethod
|
||||
def build_linear_method():
|
||||
return TorchairAscendW4A8DynamicLinearMethod()
|
||||
|
||||
@staticmethod
|
||||
def build_moe_method():
|
||||
return TorchairAscendW4A8DynamicFusedMoEMethod()
|
||||
Reference in New Issue
Block a user