[refact] unified soc_version code (#4359)
### What this PR does / why we need it?
Currently, there are two paths to judge the chip type in code,
`get_ascend_soc_version` use `get_soc_version` api in torch_npu, and
`is_310p` `use _build_info.__soc_version__`, which generate when
install. We need to unify the two paths.
We need to unify these codes based on the following points:
1. We need to ensure consistency in chip type judgment between compiling
and running states;
2. In compiling state, we need chip type to complete op's compilation,
but in running state, we only need device
type(910B/910_93/310P/910_95/etc) to make code branch judgement;
3. In compiling state, torch_npu may not have been installed yet, so we
can't use torch_npu's api.
Based on the above points, we have made the following changes:
1. When user set env `SOC_VERSION`, use it; when not set, query
soc_version by `npu-smi`;
2. generate device_type based on soc_version when compiling, and write
`__device_type__` instead of `__soc_version__` in `_build_info.py`;
3. In running state, use `__device_type__` to judge code branch.
### Does this PR introduce _any_ user-facing change?
When not set env `SOC_VERSION`, it will not be `ASCEND910B1` by default,
we will query soc_version by `npu-smi`. And env `SOC_VERSION` must be in
the list `soc_to_device` in `setup.py`.
- vLLM version: v0.11.0
- vLLM main:
2918c1b49c
Signed-off-by: zzzzwwjj <1183291235@qq.com>
This commit is contained in:
@@ -9,7 +9,7 @@ from vllm.platforms import PlatformEnum
|
||||
|
||||
from tests.ut.base import TestBase
|
||||
from vllm_ascend.platform import NPUPlatform
|
||||
from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD
|
||||
from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD, AscendDeviceType
|
||||
|
||||
|
||||
class TestNPUPlatform(TestBase):
|
||||
@@ -231,13 +231,14 @@ class TestNPUPlatform(TestBase):
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
@patch("vllm_ascend.utils.update_aclgraph_sizes")
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch('vllm_ascend.utils.get_ascend_device_type',
|
||||
return_value=AscendDeviceType._910_93)
|
||||
@patch("os.environ", {})
|
||||
@patch(
|
||||
"vllm_ascend.core.recompute_schedule_config.RecomputeSchedulerConfig.initialize_from_config"
|
||||
)
|
||||
def test_check_and_update_config_basic_config_update(
|
||||
self, mock_init_recompute, mock_is_310p, mock_update_acl,
|
||||
self, mock_init_recompute, mock_soc_version, mock_update_acl,
|
||||
mock_init_ascend, mock_check_ascend):
|
||||
mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config(
|
||||
)
|
||||
@@ -259,7 +260,8 @@ class TestNPUPlatform(TestBase):
|
||||
mock_init_ascend.assert_called_once_with(vllm_config)
|
||||
mock_check_ascend.assert_called_once()
|
||||
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch('vllm_ascend.utils.get_ascend_device_type',
|
||||
return_value=AscendDeviceType._910_93)
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
@patch(
|
||||
@@ -267,7 +269,7 @@ class TestNPUPlatform(TestBase):
|
||||
)
|
||||
def test_check_and_update_config_no_model_config_warning(
|
||||
self, mock_init_recompute, mock_init_ascend, mock_check_ascend,
|
||||
mock_is_310p):
|
||||
mock_soc_version):
|
||||
mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config(
|
||||
)
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
@@ -283,7 +285,8 @@ class TestNPUPlatform(TestBase):
|
||||
self.platform.check_and_update_config(vllm_config)
|
||||
self.assertTrue("Model config is missing" in cm.output[0])
|
||||
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch('vllm_ascend.utils.get_ascend_device_type',
|
||||
return_value=AscendDeviceType._910_93)
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
@patch(
|
||||
@@ -291,7 +294,7 @@ class TestNPUPlatform(TestBase):
|
||||
)
|
||||
def test_check_and_update_config_enforce_eager_mode(
|
||||
self, mock_init_recompute, mock_init_ascend, mock_check_ascend,
|
||||
mock_is_310p):
|
||||
mock_soc_version):
|
||||
mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config(
|
||||
)
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
@@ -318,7 +321,8 @@ class TestNPUPlatform(TestBase):
|
||||
CUDAGraphMode.NONE,
|
||||
)
|
||||
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch('vllm_ascend.utils.get_ascend_device_type',
|
||||
return_value=AscendDeviceType._910_93)
|
||||
@patch("vllm_ascend.utils.update_default_aclgraph_sizes")
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
@@ -327,7 +331,7 @@ class TestNPUPlatform(TestBase):
|
||||
)
|
||||
def test_check_and_update_config_unsupported_compilation_level(
|
||||
self, mock_init_recompute, mock_init_ascend, mock_check_ascend,
|
||||
mock_update_default, mock_is_310p):
|
||||
mock_update_default, mock_soc_version):
|
||||
mock_update_default.return_value = MagicMock()
|
||||
mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config(
|
||||
)
|
||||
@@ -357,11 +361,12 @@ class TestNPUPlatform(TestBase):
|
||||
|
||||
@pytest.mark.skip(
|
||||
"Revert me when vllm support setting cudagraph_mode on oot platform")
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch('vllm_ascend.utils.get_ascend_device_type',
|
||||
return_value=AscendDeviceType._910_93)
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
def test_check_and_update_config_unsupported_cudagraph_mode(
|
||||
self, mock_init_ascend, mock_check_ascend, mock_is_310p):
|
||||
self, mock_init_ascend, mock_check_ascend, mock_soc_version):
|
||||
mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config(
|
||||
)
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
@@ -386,7 +391,8 @@ class TestNPUPlatform(TestBase):
|
||||
CUDAGraphMode.NONE,
|
||||
)
|
||||
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch('vllm_ascend.utils.get_ascend_device_type',
|
||||
return_value=AscendDeviceType._910_93)
|
||||
@patch("vllm_ascend.utils.update_default_aclgraph_sizes")
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
@@ -395,7 +401,7 @@ class TestNPUPlatform(TestBase):
|
||||
)
|
||||
def test_check_and_update_config_torchair_enabled_compilation(
|
||||
self, mock_init_recompute, mock_init_ascend, mock_check_ascend,
|
||||
mock_update_default, mock_is_310p):
|
||||
mock_update_default, mock_soc_version):
|
||||
mock_update_default.return_value = MagicMock()
|
||||
mock_ascend_config = TestNPUPlatform.mock_vllm_ascend_config()
|
||||
mock_ascend_config.torchair_graph_config.enabled = True
|
||||
@@ -424,7 +430,8 @@ class TestNPUPlatform(TestBase):
|
||||
CUDAGraphMode.NONE,
|
||||
)
|
||||
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch('vllm_ascend.utils.get_ascend_device_type',
|
||||
return_value=AscendDeviceType._910_93)
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
@patch(
|
||||
@@ -432,7 +439,7 @@ class TestNPUPlatform(TestBase):
|
||||
)
|
||||
def test_check_and_update_config_cache_config_block_size(
|
||||
self, mock_init_recompute, mock_init_ascend, mock_check_ascend,
|
||||
mock_is_310p):
|
||||
mock_soc_version):
|
||||
mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config(
|
||||
)
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
@@ -450,7 +457,8 @@ class TestNPUPlatform(TestBase):
|
||||
|
||||
self.assertEqual(vllm_config.cache_config.block_size, 128)
|
||||
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch('vllm_ascend.utils.get_ascend_device_type',
|
||||
return_value=AscendDeviceType._910_93)
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
@patch(
|
||||
@@ -458,7 +466,7 @@ class TestNPUPlatform(TestBase):
|
||||
)
|
||||
def test_check_and_update_config_v1_worker_class_selection(
|
||||
self, mock_init_recompute, mock_init_ascend, mock_check_ascend,
|
||||
mock_is_310p):
|
||||
mock_soc_version):
|
||||
mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config(
|
||||
)
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
@@ -489,12 +497,13 @@ class TestNPUPlatform(TestBase):
|
||||
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=True)
|
||||
@patch('vllm_ascend.utils.get_ascend_device_type',
|
||||
return_value=AscendDeviceType._310P)
|
||||
@patch(
|
||||
"vllm_ascend.core.recompute_schedule_config.RecomputeSchedulerConfig.initialize_from_config"
|
||||
)
|
||||
def test_check_and_update_config_310p_no_custom_ops(
|
||||
self, mock_init_recompute, mock_is_310p, mock_init_ascend,
|
||||
self, mock_init_recompute, mock_soc_version, mock_init_ascend,
|
||||
mock_check_ascend):
|
||||
mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config(
|
||||
)
|
||||
@@ -511,7 +520,8 @@ class TestNPUPlatform(TestBase):
|
||||
self.platform.check_and_update_config(vllm_config)
|
||||
self.assertEqual(vllm_config.compilation_config.custom_ops, [])
|
||||
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch('vllm_ascend.utils.get_ascend_device_type',
|
||||
return_value=AscendDeviceType._910_93)
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
@patch(
|
||||
@@ -519,7 +529,7 @@ class TestNPUPlatform(TestBase):
|
||||
)
|
||||
def test_check_and_update_config_ascend_scheduler_config(
|
||||
self, mock_init_recompute, mock_init_ascend, mock_check_ascend,
|
||||
mock_is_310p):
|
||||
mock_soc_version):
|
||||
mock_ascend_config = TestNPUPlatform.mock_vllm_ascend_config()
|
||||
mock_ascend_config.ascend_scheduler_config.enabled = True
|
||||
mock_init_ascend.return_value = mock_ascend_config
|
||||
|
||||
Reference in New Issue
Block a user