From e3b3ffb87556cffb80fcdb93313a4a9575d34324 Mon Sep 17 00:00:00 2001 From: Li Wang Date: Fri, 1 Aug 2025 08:53:00 +0800 Subject: [PATCH] [Misc] Disable quantization in mindie_turbo (#2147) ### What this PR does / why we need it? cherry pick #1749 from v0.9.1-dev since the interface in vllm-ascend has changed so quickly, the quantization function in mindie_turbo is no longer needed, so it needs to be discarded. Co-authored-by: zouyida [zouyida@huawei.com](mailto:zouyida@huawei.com) Co-authored-by: wangli [wangli858794774@gmail.com](mailto:wangli858794774@gmail.com) ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.10.0 - vLLM main: https://github.com/vllm-project/vllm/commit/207b750e194829c4bcd4df0450f5f93d71755dae Signed-off-by: wangli --- vllm_ascend/quantization/quantizer.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/vllm_ascend/quantization/quantizer.py b/vllm_ascend/quantization/quantizer.py index c0d2241..e61593d 100644 --- a/vllm_ascend/quantization/quantizer.py +++ b/vllm_ascend/quantization/quantizer.py @@ -47,14 +47,8 @@ class AscendQuantizer: if quantization_algorithm in CUSTOMIZED_QUANTIZER_TYPE: return - try: - module = importlib.import_module("mindie_turbo") - MindIETurboQuantizer = module.MindIETurboQuantizer - return MindIETurboQuantizer.get_quantizer(quant_config, prefix, - packed_modules_mapping) - except ImportError: - return VLLMAscendQuantizer.get_quantizer(quant_config, prefix, - packed_modules_mapping) + return VLLMAscendQuantizer.get_quantizer(quant_config, prefix, + packed_modules_mapping) def build_linear_method(self): raise NotImplementedError