diff --git a/tests/ut/quantization/test_quant_config.py b/tests/ut/quantization/test_quant_config.py index 55c72c6..7529fea 100644 --- a/tests/ut/quantization/test_quant_config.py +++ b/tests/ut/quantization/test_quant_config.py @@ -10,8 +10,7 @@ from vllm.model_executor.layers.linear import (LinearBase, from tests.ut.base import TestBase from vllm_ascend.quantization.quant_config import (AscendKVCacheMethod, AscendQuantConfig) - -ASCEND_QUATIZATION_METHOD = "ascend" +from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD class TestAscendQuantConfig(TestBase): @@ -42,7 +41,7 @@ class TestAscendQuantConfig(TestBase): def test_get_name(self): self.assertEqual(AscendQuantConfig.get_name(), - ASCEND_QUATIZATION_METHOD) + ASCEND_QUANTIZATION_METHOD) def test_get_supported_act_dtypes(self): supported_dtypes = AscendQuantConfig.get_supported_act_dtypes() @@ -66,7 +65,7 @@ class TestAscendQuantConfig(TestBase): # Test when NPU is available mock_is_available.return_value = True result = AscendQuantConfig.override_quantization_method(None, None) - self.assertEqual(result, ASCEND_QUATIZATION_METHOD) + self.assertEqual(result, ASCEND_QUANTIZATION_METHOD) # Test when NPU is not available mock_is_available.return_value = False diff --git a/tests/ut/test_platform.py b/tests/ut/test_platform.py index 650cc47..61efd00 100644 --- a/tests/ut/test_platform.py +++ b/tests/ut/test_platform.py @@ -12,7 +12,7 @@ from vllm.platforms import PlatformEnum from tests.ut.base import TestBase from vllm_ascend.platform import NPUPlatform -from vllm_ascend.utils import ASCEND_QUATIZATION_METHOD +from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD class TestNPUPlatform(TestBase): @@ -43,7 +43,7 @@ class TestNPUPlatform(TestBase): "ASCEND_RT_VISIBLE_DEVICES") self.assertEqual(NPUPlatform.dispatch_key, "PrivateUse1") self.assertEqual(NPUPlatform.supported_quantization, - [ASCEND_QUATIZATION_METHOD]) + [ASCEND_QUANTIZATION_METHOD]) def test_is_sleep_mode_available(self): self.assertTrue(self.platform.is_sleep_mode_available()) @@ -61,7 +61,7 @@ class TestNPUPlatform(TestBase): mock_adapt_patch.assert_called_once_with(is_global_patch=True) - self.assertTrue(ASCEND_QUATIZATION_METHOD in mock_action.choices) + self.assertTrue(ASCEND_QUANTIZATION_METHOD in mock_action.choices) self.assertEqual(len(mock_action.choices), 3) # original 2 + ascend @patch("vllm_ascend.utils.adapt_patch") @@ -89,7 +89,7 @@ class TestNPUPlatform(TestBase): self, mock_quant_config, mock_adapt_patch): mock_parser = MagicMock() mock_action = MagicMock() - mock_action.choices = ["awq", ASCEND_QUATIZATION_METHOD] + mock_action.choices = ["awq", ASCEND_QUANTIZATION_METHOD] mock_parser._option_string_actions = {"--quantization": mock_action} self.platform.pre_register_and_update(mock_parser) diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py index 6075a79..2808768 100644 --- a/vllm_ascend/platform.py +++ b/vllm_ascend/platform.py @@ -28,7 +28,7 @@ from vllm.platforms import Platform, PlatformEnum from vllm_ascend.ascend_config import (check_ascend_config, get_ascend_config, init_ascend_config) -from vllm_ascend.utils import (ASCEND_QUATIZATION_METHOD, is_310p, +from vllm_ascend.utils import (ASCEND_QUANTIZATION_METHOD, is_310p, update_aclgraph_sizes) if TYPE_CHECKING: @@ -50,7 +50,7 @@ class NPUPlatform(Platform): device_control_env_var: str = "ASCEND_RT_VISIBLE_DEVICES" dispatch_key: str = "PrivateUse1" - supported_quantization: list[str] = [ASCEND_QUATIZATION_METHOD] + supported_quantization: list[str] = [ASCEND_QUANTIZATION_METHOD] def is_sleep_mode_available(self) -> bool: return True @@ -70,8 +70,8 @@ class NPUPlatform(Platform): quant_action = parser._option_string_actions.get('--quantization') if quant_action and hasattr(quant_action, 'choices') and quant_action.choices: - if ASCEND_QUATIZATION_METHOD not in quant_action.choices: - quant_action.choices.append(ASCEND_QUATIZATION_METHOD) + if ASCEND_QUANTIZATION_METHOD not in quant_action.choices: + quant_action.choices.append(ASCEND_QUANTIZATION_METHOD) from vllm_ascend.quantization.quant_config import \ AscendQuantConfig # noqa: F401 diff --git a/vllm_ascend/quantization/quant_config.py b/vllm_ascend/quantization/quant_config.py index 65f682d..d449c8d 100644 --- a/vllm_ascend/quantization/quant_config.py +++ b/vllm_ascend/quantization/quant_config.py @@ -36,12 +36,12 @@ from vllm.model_executor.parameter import PerTensorScaleParameter from vllm.model_executor.utils import set_weight_attrs from vllm_ascend.ops.fused_moe import AscendUnquantizedFusedMoEMethod -from vllm_ascend.utils import ASCEND_QUATIZATION_METHOD +from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD from .quantizer import AscendQuantizer -@register_quantization_config(ASCEND_QUATIZATION_METHOD) +@register_quantization_config(ASCEND_QUANTIZATION_METHOD) class AscendQuantConfig(QuantizationConfig): """Config class for Ascend @@ -57,7 +57,7 @@ class AscendQuantConfig(QuantizationConfig): @classmethod def get_name(cls) -> str: - return ASCEND_QUATIZATION_METHOD + return ASCEND_QUANTIZATION_METHOD @classmethod def get_supported_act_dtypes(cls) -> List[torch.dtype]: @@ -80,7 +80,7 @@ class AscendQuantConfig(QuantizationConfig): def override_quantization_method(cls, hf_quant_cfg, user_quant) -> Optional[str]: if torch.npu.is_available(): - return ASCEND_QUATIZATION_METHOD + return ASCEND_QUANTIZATION_METHOD return None def get_quant_method(self, layer: torch.nn.Module, diff --git a/vllm_ascend/utils.py b/vllm_ascend/utils.py index b36875e..cd1e118 100644 --- a/vllm_ascend/utils.py +++ b/vllm_ascend/utils.py @@ -45,7 +45,7 @@ else: # Maximum number of graphs that can be captured by ACL Graph MAX_CAPTURE_SIZE = 1920 -ASCEND_QUATIZATION_METHOD = "ascend" +ASCEND_QUANTIZATION_METHOD = "ascend" SOC_VERSION_INFERENCE_SERIES = ["Ascend310P3"] ACL_FORMAT_FRACTAL_ND = 2