[CI] Make test_platform UT stable (#2696)
Make test_platform stable
- vLLM version: v0.10.1.1
- vLLM main:
56d04089ef
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -18,23 +18,29 @@ from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD
|
||||
|
||||
class TestNPUPlatform(TestBase):
|
||||
|
||||
@staticmethod
|
||||
def mock_vllm_config():
|
||||
mock_vllm_config = MagicMock()
|
||||
mock_vllm_config.compilation_config = MagicMock()
|
||||
mock_vllm_config.model_config = MagicMock()
|
||||
mock_vllm_config.parallel_config = MagicMock()
|
||||
mock_vllm_config.cache_config = MagicMock()
|
||||
mock_vllm_config.scheduler_config = MagicMock()
|
||||
mock_vllm_config.speculative_config = None
|
||||
mock_vllm_config.compilation_config.pass_config.enable_sequence_parallelism = False
|
||||
mock_vllm_config.compilation_config.cudagraph_mode = None
|
||||
return mock_vllm_config
|
||||
|
||||
@staticmethod
|
||||
def mock_vllm_ascend_config():
|
||||
mock_ascend_config = MagicMock()
|
||||
mock_ascend_config.torchair_graph_config.enabled = False
|
||||
mock_ascend_config.ascend_scheduler_config.enabled = False
|
||||
return mock_ascend_config
|
||||
|
||||
def setUp(self):
|
||||
self.platform = NPUPlatform()
|
||||
|
||||
self.mock_vllm_config = MagicMock()
|
||||
self.mock_vllm_config.compilation_config = MagicMock()
|
||||
self.mock_vllm_config.model_config = MagicMock()
|
||||
self.mock_vllm_config.parallel_config = MagicMock()
|
||||
self.mock_vllm_config.cache_config = MagicMock()
|
||||
self.mock_vllm_config.scheduler_config = MagicMock()
|
||||
self.mock_vllm_config.speculative_config = None
|
||||
self.mock_vllm_config.compilation_config.pass_config.enable_sequence_parallelism = False
|
||||
self.mock_vllm_config.compilation_config.cudagraph_mode = None
|
||||
|
||||
self.mock_ascend_config = MagicMock()
|
||||
self.mock_ascend_config.torchair_graph_config.enabled = False
|
||||
self.mock_ascend_config.ascend_scheduler_config.enabled = False
|
||||
|
||||
def test_class_variables(self):
|
||||
self.assertEqual(NPUPlatform._enum, PlatformEnum.OOT)
|
||||
self.assertEqual(NPUPlatform.device_name, "npu")
|
||||
@@ -241,8 +247,10 @@ class TestNPUPlatform(TestBase):
|
||||
def test_check_and_update_config_basic_config_update(
|
||||
self, mock_is_310p, mock_update_acl, mock_init_ascend,
|
||||
mock_check_ascend):
|
||||
mock_init_ascend.return_value = self.mock_ascend_config
|
||||
self.mock_vllm_config.parallel_config.enable_expert_parallel = False
|
||||
mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config(
|
||||
)
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
vllm_config.parallel_config.enable_expert_parallel = False
|
||||
|
||||
# Use importlib.reload to reload the platform module, ensuring the mocked init_ascend_config method is used.
|
||||
# Without this reload, when calling self.platform.check_and_update_config,
|
||||
@@ -251,9 +259,9 @@ class TestNPUPlatform(TestBase):
|
||||
|
||||
importlib.reload(platform)
|
||||
|
||||
self.platform.check_and_update_config(self.mock_vllm_config)
|
||||
self.platform.check_and_update_config(vllm_config)
|
||||
|
||||
mock_init_ascend.assert_called_once_with(self.mock_vllm_config)
|
||||
mock_init_ascend.assert_called_once_with(vllm_config)
|
||||
mock_check_ascend.assert_called_once()
|
||||
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@@ -261,65 +269,67 @@ class TestNPUPlatform(TestBase):
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
def test_check_and_update_config_no_model_config_warning(
|
||||
self, mock_init_ascend, mock_check_ascend, mock_is_310p):
|
||||
mock_init_ascend.return_value = self.mock_ascend_config
|
||||
self.mock_vllm_config.model_config = None
|
||||
mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config(
|
||||
)
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
vllm_config.model_config = None
|
||||
|
||||
with self.assertLogs(logger="vllm", level="WARNING") as cm:
|
||||
from vllm_ascend import platform
|
||||
|
||||
importlib.reload(platform)
|
||||
self.platform.check_and_update_config(self.mock_vllm_config)
|
||||
self.platform.check_and_update_config(vllm_config)
|
||||
self.assertTrue("Model config is missing" in cm.output[0])
|
||||
|
||||
@pytest.mark.skip(
|
||||
"CI error, Carry out the rectification uniformly at other times")
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
def test_check_and_update_config_enforce_eager_mode(
|
||||
self, mock_init_ascend, mock_check_ascend, mock_is_310p):
|
||||
mock_init_ascend.return_value = self.mock_ascend_config
|
||||
self.mock_vllm_config.model_config.enforce_eager = True
|
||||
mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config(
|
||||
)
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
vllm_config.model_config.enforce_eager = True
|
||||
|
||||
with self.assertLogs(logger="vllm", level="INFO") as cm:
|
||||
from vllm_ascend import platform
|
||||
|
||||
importlib.reload(platform)
|
||||
self.platform.check_and_update_config(self.mock_vllm_config)
|
||||
self.platform.check_and_update_config(vllm_config)
|
||||
self.assertTrue("Compilation disabled, using eager mode by default" in
|
||||
cm.output[0])
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.compilation_config.level,
|
||||
vllm_config.compilation_config.level,
|
||||
CompilationLevel.NO_COMPILATION,
|
||||
)
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.compilation_config.cudagraph_mode,
|
||||
vllm_config.compilation_config.cudagraph_mode,
|
||||
CUDAGraphMode.NONE,
|
||||
)
|
||||
|
||||
@pytest.mark.skip(
|
||||
"CI error, Carry out the rectification uniformly at other times")
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
def test_check_and_update_config_unsupported_compilation_level(
|
||||
self, mock_init_ascend, mock_check_ascend, mock_is_310p):
|
||||
mock_init_ascend.return_value = self.mock_ascend_config
|
||||
self.mock_vllm_config.model_config.enforce_eager = False
|
||||
self.mock_vllm_config.compilation_config.level = CompilationLevel.DYNAMO_ONCE
|
||||
mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config(
|
||||
)
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
vllm_config.model_config.enforce_eager = False
|
||||
vllm_config.compilation_config.level = CompilationLevel.DYNAMO_ONCE
|
||||
|
||||
with self.assertLogs(logger="vllm", level="WARNING") as cm:
|
||||
from vllm_ascend import platform
|
||||
|
||||
importlib.reload(platform)
|
||||
self.platform.check_and_update_config(self.mock_vllm_config)
|
||||
self.platform.check_and_update_config(vllm_config)
|
||||
self.assertTrue("NPU does not support" in cm.output[0])
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.compilation_config.level,
|
||||
vllm_config.compilation_config.level,
|
||||
CompilationLevel.NO_COMPILATION,
|
||||
)
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.compilation_config.cudagraph_mode,
|
||||
vllm_config.compilation_config.cudagraph_mode,
|
||||
CUDAGraphMode.NONE,
|
||||
)
|
||||
|
||||
@@ -330,24 +340,26 @@ class TestNPUPlatform(TestBase):
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
def test_check_and_update_config_unsupported_cudagraph_mode(
|
||||
self, mock_init_ascend, mock_check_ascend, mock_is_310p):
|
||||
mock_init_ascend.return_value = self.mock_ascend_config
|
||||
self.mock_vllm_config.model_config.enforce_eager = False
|
||||
self.mock_vllm_config.compilation_config.cudagraph_mode = CUDAGraphMode.FULL
|
||||
mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config(
|
||||
)
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
vllm_config.model_config.enforce_eager = False
|
||||
vllm_config.compilation_config.cudagraph_mode = CUDAGraphMode.FULL
|
||||
|
||||
with self.assertLogs(logger="vllm", level="INFO") as cm:
|
||||
from vllm_ascend import platform
|
||||
|
||||
importlib.reload(platform)
|
||||
self.platform.check_and_update_config(self.mock_vllm_config)
|
||||
self.platform.check_and_update_config(vllm_config)
|
||||
self.assertTrue(
|
||||
"cudagraph_mode is not support on NPU. falling back to NONE" in
|
||||
cm.output[0])
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.compilation_config.level,
|
||||
vllm_config.compilation_config.level,
|
||||
CompilationLevel.NO_COMPILATION,
|
||||
)
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.compilation_config.cudagraph_mode,
|
||||
vllm_config.compilation_config.cudagraph_mode,
|
||||
CUDAGraphMode.NONE,
|
||||
)
|
||||
|
||||
@@ -356,26 +368,28 @@ class TestNPUPlatform(TestBase):
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
def test_check_and_update_config_disable_aclgraph_when_ray_enabled(
|
||||
self, mock_init_ascend, mock_check_ascend, mock_is_310p):
|
||||
mock_init_ascend.return_value = self.mock_ascend_config
|
||||
self.mock_vllm_config.model_config.enforce_eager = False
|
||||
self.mock_vllm_config.compilation_config.level = CompilationLevel.PIECEWISE
|
||||
self.mock_vllm_config.parallel_config.distributed_executor_backend = "ray"
|
||||
mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config(
|
||||
)
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
vllm_config.model_config.enforce_eager = False
|
||||
vllm_config.compilation_config.level = CompilationLevel.PIECEWISE
|
||||
vllm_config.parallel_config.distributed_executor_backend = "ray"
|
||||
|
||||
with self.assertLogs(logger="vllm", level="WARNING") as cm:
|
||||
from vllm_ascend import platform
|
||||
|
||||
importlib.reload(platform)
|
||||
self.platform.check_and_update_config(self.mock_vllm_config)
|
||||
self.platform.check_and_update_config(vllm_config)
|
||||
print(30 * "=", f"cm.output: {cm.output}")
|
||||
self.assertTrue(
|
||||
"Ray distributed executor backend is not compatible with ACL Graph mode"
|
||||
in cm.output[0])
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.compilation_config.level,
|
||||
vllm_config.compilation_config.level,
|
||||
CompilationLevel.NO_COMPILATION,
|
||||
)
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.compilation_config.cudagraph_mode,
|
||||
vllm_config.compilation_config.cudagraph_mode,
|
||||
CUDAGraphMode.NONE,
|
||||
)
|
||||
|
||||
@@ -384,23 +398,25 @@ class TestNPUPlatform(TestBase):
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
def test_check_and_update_config_torchair_enabled_compilation(
|
||||
self, mock_init_ascend, mock_check_ascend, mock_is_310p):
|
||||
self.mock_ascend_config.torchair_graph_config.enabled = True
|
||||
mock_init_ascend.return_value = self.mock_ascend_config
|
||||
self.mock_vllm_config.model_config.enforce_eager = False
|
||||
self.mock_vllm_config.compilation_config.level = CompilationLevel.PIECEWISE
|
||||
mock_ascend_config = TestNPUPlatform.mock_vllm_ascend_config()
|
||||
mock_ascend_config.torchair_graph_config.enabled = True
|
||||
mock_init_ascend.return_value = mock_ascend_config
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
vllm_config.model_config.enforce_eager = False
|
||||
vllm_config.compilation_config.level = CompilationLevel.PIECEWISE
|
||||
|
||||
with self.assertLogs(logger="vllm", level="INFO") as cm:
|
||||
from vllm_ascend import platform
|
||||
|
||||
importlib.reload(platform)
|
||||
self.platform.check_and_update_config(self.mock_vllm_config)
|
||||
self.platform.check_and_update_config(vllm_config)
|
||||
self.assertTrue("Torchair compilation enabled" in cm.output[0])
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.compilation_config.level,
|
||||
vllm_config.compilation_config.level,
|
||||
CompilationLevel.NO_COMPILATION,
|
||||
)
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.compilation_config.cudagraph_mode,
|
||||
vllm_config.compilation_config.cudagraph_mode,
|
||||
CUDAGraphMode.NONE,
|
||||
)
|
||||
|
||||
@@ -409,43 +425,47 @@ class TestNPUPlatform(TestBase):
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
def test_check_and_update_config_cache_config_block_size(
|
||||
self, mock_init_ascend, mock_check_ascend, mock_is_310p):
|
||||
mock_init_ascend.return_value = self.mock_ascend_config
|
||||
self.mock_vllm_config.cache_config.block_size = None
|
||||
self.mock_vllm_config.cache_config.enable_prefix_caching = True
|
||||
mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config(
|
||||
)
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
vllm_config.cache_config.block_size = None
|
||||
vllm_config.cache_config.enable_prefix_caching = True
|
||||
|
||||
from vllm_ascend import platform
|
||||
|
||||
importlib.reload(platform)
|
||||
|
||||
self.platform.check_and_update_config(self.mock_vllm_config)
|
||||
self.platform.check_and_update_config(vllm_config)
|
||||
|
||||
self.assertEqual(self.mock_vllm_config.cache_config.block_size, 128)
|
||||
self.assertEqual(vllm_config.cache_config.block_size, 128)
|
||||
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
def test_check_and_update_config_v1_worker_class_selection(
|
||||
self, mock_init_ascend, mock_check_ascend, mock_is_310p):
|
||||
mock_init_ascend.return_value = self.mock_ascend_config
|
||||
self.mock_vllm_config.parallel_config.worker_cls = "auto"
|
||||
mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config(
|
||||
)
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
vllm_config.parallel_config.worker_cls = "auto"
|
||||
|
||||
from vllm_ascend import platform
|
||||
|
||||
importlib.reload(platform)
|
||||
self.platform.check_and_update_config(self.mock_vllm_config)
|
||||
self.platform.check_and_update_config(vllm_config)
|
||||
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.parallel_config.worker_cls,
|
||||
vllm_config.parallel_config.worker_cls,
|
||||
"vllm_ascend.worker.worker_v1.NPUWorker",
|
||||
)
|
||||
|
||||
test_ascend_config = self.mock_ascend_config
|
||||
test_ascend_config = TestNPUPlatform.mock_vllm_ascend_config()
|
||||
test_ascend_config.torchair_graph_config.enabled = True
|
||||
mock_init_ascend.return_value = test_ascend_config
|
||||
self.mock_vllm_config.parallel_config.worker_cls = "auto"
|
||||
self.platform.check_and_update_config(self.mock_vllm_config)
|
||||
vllm_config.parallel_config.worker_cls = "auto"
|
||||
self.platform.check_and_update_config(vllm_config)
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.parallel_config.worker_cls,
|
||||
vllm_config.parallel_config.worker_cls,
|
||||
"vllm_ascend.torchair.torchair_worker.NPUTorchairWorker",
|
||||
)
|
||||
|
||||
@@ -454,31 +474,35 @@ class TestNPUPlatform(TestBase):
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=True)
|
||||
def test_check_and_update_config_310p_no_custom_ops(
|
||||
self, mock_is_310p, mock_init_ascend, mock_check_ascend):
|
||||
mock_init_ascend.return_value = self.mock_ascend_config
|
||||
self.mock_vllm_config.compilation_config.custom_ops = []
|
||||
mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config(
|
||||
)
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
vllm_config.compilation_config.custom_ops = []
|
||||
|
||||
from vllm_ascend import platform
|
||||
|
||||
importlib.reload(platform)
|
||||
|
||||
self.platform.check_and_update_config(self.mock_vllm_config)
|
||||
self.assertEqual(self.mock_vllm_config.compilation_config.custom_ops,
|
||||
[])
|
||||
self.platform.check_and_update_config(vllm_config)
|
||||
self.assertEqual(vllm_config.compilation_config.custom_ops, [])
|
||||
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
def test_check_and_update_config_ascend_scheduler_config(
|
||||
self, mock_init_ascend, mock_check_ascend, mock_is_310p):
|
||||
self.mock_ascend_config.ascend_scheduler_config.enabled = True
|
||||
mock_init_ascend.return_value = self.mock_ascend_config
|
||||
mock_ascend_config = TestNPUPlatform.mock_vllm_ascend_config()
|
||||
mock_ascend_config.ascend_scheduler_config.enabled = True
|
||||
mock_init_ascend.return_value = mock_ascend_config
|
||||
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
|
||||
with patch("vllm_ascend.core.schedule_config.AscendSchedulerConfig"
|
||||
) as mock_scheduler:
|
||||
from vllm_ascend import platform
|
||||
|
||||
importlib.reload(platform)
|
||||
self.platform.check_and_update_config(self.mock_vllm_config)
|
||||
self.platform.check_and_update_config(vllm_config)
|
||||
mock_scheduler.initialize_from_config.assert_called_once()
|
||||
|
||||
@patch('vllm_ascend.platform.get_ascend_config')
|
||||
|
||||
Reference in New Issue
Block a user