[Main][Refactor]Change ASCEND_QUATIZATION_METHOD to ASCEND_QUANTIZATION_METHOD (#2517)
### What this PR does / why we need it?
The constant ASCEND_QUATIZATION_METHOD in vllm_ascend/utils.py is
misspelled and should be corrected to ASCEND_QUANTIZATION_METHOD.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
CI passed with new added/existing test.
- vLLM version: v0.10.1.1
- vLLM main:
c9abb10489
Signed-off-by: zhanghaiwen <zhanghaiwen@cmss.chinamobile.com>
Co-authored-by: zhanghaiwen <zhanghaiwen@cmss.chinamobile.com>
This commit is contained in:
@@ -10,8 +10,7 @@ from vllm.model_executor.layers.linear import (LinearBase,
|
|||||||
from tests.ut.base import TestBase
|
from tests.ut.base import TestBase
|
||||||
from vllm_ascend.quantization.quant_config import (AscendKVCacheMethod,
|
from vllm_ascend.quantization.quant_config import (AscendKVCacheMethod,
|
||||||
AscendQuantConfig)
|
AscendQuantConfig)
|
||||||
|
from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD
|
||||||
ASCEND_QUATIZATION_METHOD = "ascend"
|
|
||||||
|
|
||||||
|
|
||||||
class TestAscendQuantConfig(TestBase):
|
class TestAscendQuantConfig(TestBase):
|
||||||
@@ -42,7 +41,7 @@ class TestAscendQuantConfig(TestBase):
|
|||||||
|
|
||||||
def test_get_name(self):
|
def test_get_name(self):
|
||||||
self.assertEqual(AscendQuantConfig.get_name(),
|
self.assertEqual(AscendQuantConfig.get_name(),
|
||||||
ASCEND_QUATIZATION_METHOD)
|
ASCEND_QUANTIZATION_METHOD)
|
||||||
|
|
||||||
def test_get_supported_act_dtypes(self):
|
def test_get_supported_act_dtypes(self):
|
||||||
supported_dtypes = AscendQuantConfig.get_supported_act_dtypes()
|
supported_dtypes = AscendQuantConfig.get_supported_act_dtypes()
|
||||||
@@ -66,7 +65,7 @@ class TestAscendQuantConfig(TestBase):
|
|||||||
# Test when NPU is available
|
# Test when NPU is available
|
||||||
mock_is_available.return_value = True
|
mock_is_available.return_value = True
|
||||||
result = AscendQuantConfig.override_quantization_method(None, None)
|
result = AscendQuantConfig.override_quantization_method(None, None)
|
||||||
self.assertEqual(result, ASCEND_QUATIZATION_METHOD)
|
self.assertEqual(result, ASCEND_QUANTIZATION_METHOD)
|
||||||
|
|
||||||
# Test when NPU is not available
|
# Test when NPU is not available
|
||||||
mock_is_available.return_value = False
|
mock_is_available.return_value = False
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from vllm.platforms import PlatformEnum
|
|||||||
|
|
||||||
from tests.ut.base import TestBase
|
from tests.ut.base import TestBase
|
||||||
from vllm_ascend.platform import NPUPlatform
|
from vllm_ascend.platform import NPUPlatform
|
||||||
from vllm_ascend.utils import ASCEND_QUATIZATION_METHOD
|
from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD
|
||||||
|
|
||||||
|
|
||||||
class TestNPUPlatform(TestBase):
|
class TestNPUPlatform(TestBase):
|
||||||
@@ -43,7 +43,7 @@ class TestNPUPlatform(TestBase):
|
|||||||
"ASCEND_RT_VISIBLE_DEVICES")
|
"ASCEND_RT_VISIBLE_DEVICES")
|
||||||
self.assertEqual(NPUPlatform.dispatch_key, "PrivateUse1")
|
self.assertEqual(NPUPlatform.dispatch_key, "PrivateUse1")
|
||||||
self.assertEqual(NPUPlatform.supported_quantization,
|
self.assertEqual(NPUPlatform.supported_quantization,
|
||||||
[ASCEND_QUATIZATION_METHOD])
|
[ASCEND_QUANTIZATION_METHOD])
|
||||||
|
|
||||||
def test_is_sleep_mode_available(self):
|
def test_is_sleep_mode_available(self):
|
||||||
self.assertTrue(self.platform.is_sleep_mode_available())
|
self.assertTrue(self.platform.is_sleep_mode_available())
|
||||||
@@ -61,7 +61,7 @@ class TestNPUPlatform(TestBase):
|
|||||||
|
|
||||||
mock_adapt_patch.assert_called_once_with(is_global_patch=True)
|
mock_adapt_patch.assert_called_once_with(is_global_patch=True)
|
||||||
|
|
||||||
self.assertTrue(ASCEND_QUATIZATION_METHOD in mock_action.choices)
|
self.assertTrue(ASCEND_QUANTIZATION_METHOD in mock_action.choices)
|
||||||
self.assertEqual(len(mock_action.choices), 3) # original 2 + ascend
|
self.assertEqual(len(mock_action.choices), 3) # original 2 + ascend
|
||||||
|
|
||||||
@patch("vllm_ascend.utils.adapt_patch")
|
@patch("vllm_ascend.utils.adapt_patch")
|
||||||
@@ -89,7 +89,7 @@ class TestNPUPlatform(TestBase):
|
|||||||
self, mock_quant_config, mock_adapt_patch):
|
self, mock_quant_config, mock_adapt_patch):
|
||||||
mock_parser = MagicMock()
|
mock_parser = MagicMock()
|
||||||
mock_action = MagicMock()
|
mock_action = MagicMock()
|
||||||
mock_action.choices = ["awq", ASCEND_QUATIZATION_METHOD]
|
mock_action.choices = ["awq", ASCEND_QUANTIZATION_METHOD]
|
||||||
mock_parser._option_string_actions = {"--quantization": mock_action}
|
mock_parser._option_string_actions = {"--quantization": mock_action}
|
||||||
|
|
||||||
self.platform.pre_register_and_update(mock_parser)
|
self.platform.pre_register_and_update(mock_parser)
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ from vllm.platforms import Platform, PlatformEnum
|
|||||||
|
|
||||||
from vllm_ascend.ascend_config import (check_ascend_config, get_ascend_config,
|
from vllm_ascend.ascend_config import (check_ascend_config, get_ascend_config,
|
||||||
init_ascend_config)
|
init_ascend_config)
|
||||||
from vllm_ascend.utils import (ASCEND_QUATIZATION_METHOD, is_310p,
|
from vllm_ascend.utils import (ASCEND_QUANTIZATION_METHOD, is_310p,
|
||||||
update_aclgraph_sizes)
|
update_aclgraph_sizes)
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
@@ -50,7 +50,7 @@ class NPUPlatform(Platform):
|
|||||||
device_control_env_var: str = "ASCEND_RT_VISIBLE_DEVICES"
|
device_control_env_var: str = "ASCEND_RT_VISIBLE_DEVICES"
|
||||||
dispatch_key: str = "PrivateUse1"
|
dispatch_key: str = "PrivateUse1"
|
||||||
|
|
||||||
supported_quantization: list[str] = [ASCEND_QUATIZATION_METHOD]
|
supported_quantization: list[str] = [ASCEND_QUANTIZATION_METHOD]
|
||||||
|
|
||||||
def is_sleep_mode_available(self) -> bool:
|
def is_sleep_mode_available(self) -> bool:
|
||||||
return True
|
return True
|
||||||
@@ -70,8 +70,8 @@ class NPUPlatform(Platform):
|
|||||||
quant_action = parser._option_string_actions.get('--quantization')
|
quant_action = parser._option_string_actions.get('--quantization')
|
||||||
if quant_action and hasattr(quant_action,
|
if quant_action and hasattr(quant_action,
|
||||||
'choices') and quant_action.choices:
|
'choices') and quant_action.choices:
|
||||||
if ASCEND_QUATIZATION_METHOD not in quant_action.choices:
|
if ASCEND_QUANTIZATION_METHOD not in quant_action.choices:
|
||||||
quant_action.choices.append(ASCEND_QUATIZATION_METHOD)
|
quant_action.choices.append(ASCEND_QUANTIZATION_METHOD)
|
||||||
|
|
||||||
from vllm_ascend.quantization.quant_config import \
|
from vllm_ascend.quantization.quant_config import \
|
||||||
AscendQuantConfig # noqa: F401
|
AscendQuantConfig # noqa: F401
|
||||||
|
|||||||
@@ -36,12 +36,12 @@ from vllm.model_executor.parameter import PerTensorScaleParameter
|
|||||||
from vllm.model_executor.utils import set_weight_attrs
|
from vllm.model_executor.utils import set_weight_attrs
|
||||||
|
|
||||||
from vllm_ascend.ops.fused_moe import AscendUnquantizedFusedMoEMethod
|
from vllm_ascend.ops.fused_moe import AscendUnquantizedFusedMoEMethod
|
||||||
from vllm_ascend.utils import ASCEND_QUATIZATION_METHOD
|
from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD
|
||||||
|
|
||||||
from .quantizer import AscendQuantizer
|
from .quantizer import AscendQuantizer
|
||||||
|
|
||||||
|
|
||||||
@register_quantization_config(ASCEND_QUATIZATION_METHOD)
|
@register_quantization_config(ASCEND_QUANTIZATION_METHOD)
|
||||||
class AscendQuantConfig(QuantizationConfig):
|
class AscendQuantConfig(QuantizationConfig):
|
||||||
"""Config class for Ascend
|
"""Config class for Ascend
|
||||||
|
|
||||||
@@ -57,7 +57,7 @@ class AscendQuantConfig(QuantizationConfig):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_name(cls) -> str:
|
def get_name(cls) -> str:
|
||||||
return ASCEND_QUATIZATION_METHOD
|
return ASCEND_QUANTIZATION_METHOD
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_supported_act_dtypes(cls) -> List[torch.dtype]:
|
def get_supported_act_dtypes(cls) -> List[torch.dtype]:
|
||||||
@@ -80,7 +80,7 @@ class AscendQuantConfig(QuantizationConfig):
|
|||||||
def override_quantization_method(cls, hf_quant_cfg,
|
def override_quantization_method(cls, hf_quant_cfg,
|
||||||
user_quant) -> Optional[str]:
|
user_quant) -> Optional[str]:
|
||||||
if torch.npu.is_available():
|
if torch.npu.is_available():
|
||||||
return ASCEND_QUATIZATION_METHOD
|
return ASCEND_QUANTIZATION_METHOD
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_quant_method(self, layer: torch.nn.Module,
|
def get_quant_method(self, layer: torch.nn.Module,
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ else:
|
|||||||
# Maximum number of graphs that can be captured by ACL Graph
|
# Maximum number of graphs that can be captured by ACL Graph
|
||||||
MAX_CAPTURE_SIZE = 1920
|
MAX_CAPTURE_SIZE = 1920
|
||||||
|
|
||||||
ASCEND_QUATIZATION_METHOD = "ascend"
|
ASCEND_QUANTIZATION_METHOD = "ascend"
|
||||||
SOC_VERSION_INFERENCE_SERIES = ["Ascend310P3"]
|
SOC_VERSION_INFERENCE_SERIES = ["Ascend310P3"]
|
||||||
|
|
||||||
ACL_FORMAT_FRACTAL_ND = 2
|
ACL_FORMAT_FRACTAL_ND = 2
|
||||||
|
|||||||
Reference in New Issue
Block a user