diff --git a/tests/ut/test_platform.py b/tests/ut/test_platform.py index 2b381bb..de8b9be 100644 --- a/tests/ut/test_platform.py +++ b/tests/ut/test_platform.py @@ -18,23 +18,29 @@ from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD class TestNPUPlatform(TestBase): + @staticmethod + def mock_vllm_config(): + mock_vllm_config = MagicMock() + mock_vllm_config.compilation_config = MagicMock() + mock_vllm_config.model_config = MagicMock() + mock_vllm_config.parallel_config = MagicMock() + mock_vllm_config.cache_config = MagicMock() + mock_vllm_config.scheduler_config = MagicMock() + mock_vllm_config.speculative_config = None + mock_vllm_config.compilation_config.pass_config.enable_sequence_parallelism = False + mock_vllm_config.compilation_config.cudagraph_mode = None + return mock_vllm_config + + @staticmethod + def mock_vllm_ascend_config(): + mock_ascend_config = MagicMock() + mock_ascend_config.torchair_graph_config.enabled = False + mock_ascend_config.ascend_scheduler_config.enabled = False + return mock_ascend_config + def setUp(self): self.platform = NPUPlatform() - self.mock_vllm_config = MagicMock() - self.mock_vllm_config.compilation_config = MagicMock() - self.mock_vllm_config.model_config = MagicMock() - self.mock_vllm_config.parallel_config = MagicMock() - self.mock_vllm_config.cache_config = MagicMock() - self.mock_vllm_config.scheduler_config = MagicMock() - self.mock_vllm_config.speculative_config = None - self.mock_vllm_config.compilation_config.pass_config.enable_sequence_parallelism = False - self.mock_vllm_config.compilation_config.cudagraph_mode = None - - self.mock_ascend_config = MagicMock() - self.mock_ascend_config.torchair_graph_config.enabled = False - self.mock_ascend_config.ascend_scheduler_config.enabled = False - def test_class_variables(self): self.assertEqual(NPUPlatform._enum, PlatformEnum.OOT) self.assertEqual(NPUPlatform.device_name, "npu") @@ -241,8 +247,10 @@ class TestNPUPlatform(TestBase): def test_check_and_update_config_basic_config_update( self, mock_is_310p, mock_update_acl, mock_init_ascend, mock_check_ascend): - mock_init_ascend.return_value = self.mock_ascend_config - self.mock_vllm_config.parallel_config.enable_expert_parallel = False + mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config( + ) + vllm_config = TestNPUPlatform.mock_vllm_config() + vllm_config.parallel_config.enable_expert_parallel = False # Use importlib.reload to reload the platform module, ensuring the mocked init_ascend_config method is used. # Without this reload, when calling self.platform.check_and_update_config, @@ -251,9 +259,9 @@ class TestNPUPlatform(TestBase): importlib.reload(platform) - self.platform.check_and_update_config(self.mock_vllm_config) + self.platform.check_and_update_config(vllm_config) - mock_init_ascend.assert_called_once_with(self.mock_vllm_config) + mock_init_ascend.assert_called_once_with(vllm_config) mock_check_ascend.assert_called_once() @patch("vllm_ascend.utils.is_310p", return_value=False) @@ -261,65 +269,67 @@ class TestNPUPlatform(TestBase): @patch("vllm_ascend.ascend_config.init_ascend_config") def test_check_and_update_config_no_model_config_warning( self, mock_init_ascend, mock_check_ascend, mock_is_310p): - mock_init_ascend.return_value = self.mock_ascend_config - self.mock_vllm_config.model_config = None + mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config( + ) + vllm_config = TestNPUPlatform.mock_vllm_config() + vllm_config.model_config = None with self.assertLogs(logger="vllm", level="WARNING") as cm: from vllm_ascend import platform importlib.reload(platform) - self.platform.check_and_update_config(self.mock_vllm_config) + self.platform.check_and_update_config(vllm_config) self.assertTrue("Model config is missing" in cm.output[0]) - @pytest.mark.skip( - "CI error, Carry out the rectification uniformly at other times") @patch("vllm_ascend.utils.is_310p", return_value=False) @patch("vllm_ascend.ascend_config.check_ascend_config") @patch("vllm_ascend.ascend_config.init_ascend_config") def test_check_and_update_config_enforce_eager_mode( self, mock_init_ascend, mock_check_ascend, mock_is_310p): - mock_init_ascend.return_value = self.mock_ascend_config - self.mock_vllm_config.model_config.enforce_eager = True + mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config( + ) + vllm_config = TestNPUPlatform.mock_vllm_config() + vllm_config.model_config.enforce_eager = True with self.assertLogs(logger="vllm", level="INFO") as cm: from vllm_ascend import platform importlib.reload(platform) - self.platform.check_and_update_config(self.mock_vllm_config) + self.platform.check_and_update_config(vllm_config) self.assertTrue("Compilation disabled, using eager mode by default" in cm.output[0]) self.assertEqual( - self.mock_vllm_config.compilation_config.level, + vllm_config.compilation_config.level, CompilationLevel.NO_COMPILATION, ) self.assertEqual( - self.mock_vllm_config.compilation_config.cudagraph_mode, + vllm_config.compilation_config.cudagraph_mode, CUDAGraphMode.NONE, ) - @pytest.mark.skip( - "CI error, Carry out the rectification uniformly at other times") @patch("vllm_ascend.utils.is_310p", return_value=False) @patch("vllm_ascend.ascend_config.check_ascend_config") @patch("vllm_ascend.ascend_config.init_ascend_config") def test_check_and_update_config_unsupported_compilation_level( self, mock_init_ascend, mock_check_ascend, mock_is_310p): - mock_init_ascend.return_value = self.mock_ascend_config - self.mock_vllm_config.model_config.enforce_eager = False - self.mock_vllm_config.compilation_config.level = CompilationLevel.DYNAMO_ONCE + mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config( + ) + vllm_config = TestNPUPlatform.mock_vllm_config() + vllm_config.model_config.enforce_eager = False + vllm_config.compilation_config.level = CompilationLevel.DYNAMO_ONCE with self.assertLogs(logger="vllm", level="WARNING") as cm: from vllm_ascend import platform importlib.reload(platform) - self.platform.check_and_update_config(self.mock_vllm_config) + self.platform.check_and_update_config(vllm_config) self.assertTrue("NPU does not support" in cm.output[0]) self.assertEqual( - self.mock_vllm_config.compilation_config.level, + vllm_config.compilation_config.level, CompilationLevel.NO_COMPILATION, ) self.assertEqual( - self.mock_vllm_config.compilation_config.cudagraph_mode, + vllm_config.compilation_config.cudagraph_mode, CUDAGraphMode.NONE, ) @@ -330,24 +340,26 @@ class TestNPUPlatform(TestBase): @patch("vllm_ascend.ascend_config.init_ascend_config") def test_check_and_update_config_unsupported_cudagraph_mode( self, mock_init_ascend, mock_check_ascend, mock_is_310p): - mock_init_ascend.return_value = self.mock_ascend_config - self.mock_vllm_config.model_config.enforce_eager = False - self.mock_vllm_config.compilation_config.cudagraph_mode = CUDAGraphMode.FULL + mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config( + ) + vllm_config = TestNPUPlatform.mock_vllm_config() + vllm_config.model_config.enforce_eager = False + vllm_config.compilation_config.cudagraph_mode = CUDAGraphMode.FULL with self.assertLogs(logger="vllm", level="INFO") as cm: from vllm_ascend import platform importlib.reload(platform) - self.platform.check_and_update_config(self.mock_vllm_config) + self.platform.check_and_update_config(vllm_config) self.assertTrue( "cudagraph_mode is not support on NPU. falling back to NONE" in cm.output[0]) self.assertEqual( - self.mock_vllm_config.compilation_config.level, + vllm_config.compilation_config.level, CompilationLevel.NO_COMPILATION, ) self.assertEqual( - self.mock_vllm_config.compilation_config.cudagraph_mode, + vllm_config.compilation_config.cudagraph_mode, CUDAGraphMode.NONE, ) @@ -356,26 +368,28 @@ class TestNPUPlatform(TestBase): @patch("vllm_ascend.ascend_config.init_ascend_config") def test_check_and_update_config_disable_aclgraph_when_ray_enabled( self, mock_init_ascend, mock_check_ascend, mock_is_310p): - mock_init_ascend.return_value = self.mock_ascend_config - self.mock_vllm_config.model_config.enforce_eager = False - self.mock_vllm_config.compilation_config.level = CompilationLevel.PIECEWISE - self.mock_vllm_config.parallel_config.distributed_executor_backend = "ray" + mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config( + ) + vllm_config = TestNPUPlatform.mock_vllm_config() + vllm_config.model_config.enforce_eager = False + vllm_config.compilation_config.level = CompilationLevel.PIECEWISE + vllm_config.parallel_config.distributed_executor_backend = "ray" with self.assertLogs(logger="vllm", level="WARNING") as cm: from vllm_ascend import platform importlib.reload(platform) - self.platform.check_and_update_config(self.mock_vllm_config) + self.platform.check_and_update_config(vllm_config) print(30 * "=", f"cm.output: {cm.output}") self.assertTrue( "Ray distributed executor backend is not compatible with ACL Graph mode" in cm.output[0]) self.assertEqual( - self.mock_vllm_config.compilation_config.level, + vllm_config.compilation_config.level, CompilationLevel.NO_COMPILATION, ) self.assertEqual( - self.mock_vllm_config.compilation_config.cudagraph_mode, + vllm_config.compilation_config.cudagraph_mode, CUDAGraphMode.NONE, ) @@ -384,23 +398,25 @@ class TestNPUPlatform(TestBase): @patch("vllm_ascend.ascend_config.init_ascend_config") def test_check_and_update_config_torchair_enabled_compilation( self, mock_init_ascend, mock_check_ascend, mock_is_310p): - self.mock_ascend_config.torchair_graph_config.enabled = True - mock_init_ascend.return_value = self.mock_ascend_config - self.mock_vllm_config.model_config.enforce_eager = False - self.mock_vllm_config.compilation_config.level = CompilationLevel.PIECEWISE + mock_ascend_config = TestNPUPlatform.mock_vllm_ascend_config() + mock_ascend_config.torchair_graph_config.enabled = True + mock_init_ascend.return_value = mock_ascend_config + vllm_config = TestNPUPlatform.mock_vllm_config() + vllm_config.model_config.enforce_eager = False + vllm_config.compilation_config.level = CompilationLevel.PIECEWISE with self.assertLogs(logger="vllm", level="INFO") as cm: from vllm_ascend import platform importlib.reload(platform) - self.platform.check_and_update_config(self.mock_vllm_config) + self.platform.check_and_update_config(vllm_config) self.assertTrue("Torchair compilation enabled" in cm.output[0]) self.assertEqual( - self.mock_vllm_config.compilation_config.level, + vllm_config.compilation_config.level, CompilationLevel.NO_COMPILATION, ) self.assertEqual( - self.mock_vllm_config.compilation_config.cudagraph_mode, + vllm_config.compilation_config.cudagraph_mode, CUDAGraphMode.NONE, ) @@ -409,43 +425,47 @@ class TestNPUPlatform(TestBase): @patch("vllm_ascend.ascend_config.init_ascend_config") def test_check_and_update_config_cache_config_block_size( self, mock_init_ascend, mock_check_ascend, mock_is_310p): - mock_init_ascend.return_value = self.mock_ascend_config - self.mock_vllm_config.cache_config.block_size = None - self.mock_vllm_config.cache_config.enable_prefix_caching = True + mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config( + ) + vllm_config = TestNPUPlatform.mock_vllm_config() + vllm_config.cache_config.block_size = None + vllm_config.cache_config.enable_prefix_caching = True from vllm_ascend import platform importlib.reload(platform) - self.platform.check_and_update_config(self.mock_vllm_config) + self.platform.check_and_update_config(vllm_config) - self.assertEqual(self.mock_vllm_config.cache_config.block_size, 128) + self.assertEqual(vllm_config.cache_config.block_size, 128) @patch("vllm_ascend.utils.is_310p", return_value=False) @patch("vllm_ascend.ascend_config.check_ascend_config") @patch("vllm_ascend.ascend_config.init_ascend_config") def test_check_and_update_config_v1_worker_class_selection( self, mock_init_ascend, mock_check_ascend, mock_is_310p): - mock_init_ascend.return_value = self.mock_ascend_config - self.mock_vllm_config.parallel_config.worker_cls = "auto" + mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config( + ) + vllm_config = TestNPUPlatform.mock_vllm_config() + vllm_config.parallel_config.worker_cls = "auto" from vllm_ascend import platform importlib.reload(platform) - self.platform.check_and_update_config(self.mock_vllm_config) + self.platform.check_and_update_config(vllm_config) self.assertEqual( - self.mock_vllm_config.parallel_config.worker_cls, + vllm_config.parallel_config.worker_cls, "vllm_ascend.worker.worker_v1.NPUWorker", ) - test_ascend_config = self.mock_ascend_config + test_ascend_config = TestNPUPlatform.mock_vllm_ascend_config() test_ascend_config.torchair_graph_config.enabled = True mock_init_ascend.return_value = test_ascend_config - self.mock_vllm_config.parallel_config.worker_cls = "auto" - self.platform.check_and_update_config(self.mock_vllm_config) + vllm_config.parallel_config.worker_cls = "auto" + self.platform.check_and_update_config(vllm_config) self.assertEqual( - self.mock_vllm_config.parallel_config.worker_cls, + vllm_config.parallel_config.worker_cls, "vllm_ascend.torchair.torchair_worker.NPUTorchairWorker", ) @@ -454,31 +474,35 @@ class TestNPUPlatform(TestBase): @patch("vllm_ascend.utils.is_310p", return_value=True) def test_check_and_update_config_310p_no_custom_ops( self, mock_is_310p, mock_init_ascend, mock_check_ascend): - mock_init_ascend.return_value = self.mock_ascend_config - self.mock_vllm_config.compilation_config.custom_ops = [] + mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config( + ) + vllm_config = TestNPUPlatform.mock_vllm_config() + vllm_config.compilation_config.custom_ops = [] from vllm_ascend import platform importlib.reload(platform) - self.platform.check_and_update_config(self.mock_vllm_config) - self.assertEqual(self.mock_vllm_config.compilation_config.custom_ops, - []) + self.platform.check_and_update_config(vllm_config) + self.assertEqual(vllm_config.compilation_config.custom_ops, []) @patch("vllm_ascend.utils.is_310p", return_value=False) @patch("vllm_ascend.ascend_config.check_ascend_config") @patch("vllm_ascend.ascend_config.init_ascend_config") def test_check_and_update_config_ascend_scheduler_config( self, mock_init_ascend, mock_check_ascend, mock_is_310p): - self.mock_ascend_config.ascend_scheduler_config.enabled = True - mock_init_ascend.return_value = self.mock_ascend_config + mock_ascend_config = TestNPUPlatform.mock_vllm_ascend_config() + mock_ascend_config.ascend_scheduler_config.enabled = True + mock_init_ascend.return_value = mock_ascend_config + + vllm_config = TestNPUPlatform.mock_vllm_config() with patch("vllm_ascend.core.schedule_config.AscendSchedulerConfig" ) as mock_scheduler: from vllm_ascend import platform importlib.reload(platform) - self.platform.check_and_update_config(self.mock_vllm_config) + self.platform.check_and_update_config(vllm_config) mock_scheduler.initialize_from_config.assert_called_once() @patch('vllm_ascend.platform.get_ascend_config')