diff --git a/tests/ut/quantization/test_quant_config.py b/tests/ut/quantization/test_quant_config.py index b667767b..b0dc73db 100644 --- a/tests/ut/quantization/test_quant_config.py +++ b/tests/ut/quantization/test_quant_config.py @@ -66,11 +66,19 @@ class TestAscendQuantConfig(TestBase): mock_is_available.return_value = True result = AscendQuantConfig.override_quantization_method(None, None) self.assertIsNone(result) + hf_quant_cfg = {"quant_method": ""} + result = AscendQuantConfig.override_quantization_method( + hf_quant_cfg, None) + self.assertEqual(result, "ascend") # Test when NPU is not available mock_is_available.return_value = False result = AscendQuantConfig.override_quantization_method(None, None) self.assertIsNone(result) + hf_quant_cfg = {"quant_method": ""} + result = AscendQuantConfig.override_quantization_method( + hf_quant_cfg, None) + self.assertIsNone(result) def test_get_quant_method_for_linear(self): mock_config = MagicMock() diff --git a/vllm_ascend/quantization/quant_config.py b/vllm_ascend/quantization/quant_config.py index e9d0c97f..e19a008b 100644 --- a/vllm_ascend/quantization/quant_config.py +++ b/vllm_ascend/quantization/quant_config.py @@ -96,7 +96,7 @@ class AscendQuantConfig(QuantizationConfig): user_quant) -> Optional[str]: if hf_quant_cfg is not None: quant_method = hf_quant_cfg.get("quant_method", None) - if quant_method is None and torch.npu.is_available(): + if not quant_method and torch.npu.is_available(): return ASCEND_QUANTIZATION_METHOD return None