From e332e27ec306a449bfd4fb207a545eb53a3194fe Mon Sep 17 00:00:00 2001 From: CodeCat <43676926+ForBetterCodeNine@users.noreply.github.com> Date: Fri, 21 Nov 2025 16:33:34 +0800 Subject: [PATCH] [Test] Add ut test for torchair (#4287) ### What this PR does / why we need it? The current community lacks unit tests (UT) for files such as torchair_worker, mtp_proposer, and model_runner. Therefore, UT coverage for these files needs to be added. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? - vLLM version: v0.11.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379 --------- Signed-off-by: CodeNine-CJ --- .../ut/torchair/test_torchair_model_runner.py | 45 ++++++++ .../ut/torchair/test_torchair_mtp_proposer.py | 85 ++++++++++++++ tests/ut/torchair/test_torchair_worker.py | 107 ++++++++++++++++++ 3 files changed, 237 insertions(+) create mode 100644 tests/ut/torchair/test_torchair_model_runner.py create mode 100644 tests/ut/torchair/test_torchair_mtp_proposer.py create mode 100644 tests/ut/torchair/test_torchair_worker.py diff --git a/tests/ut/torchair/test_torchair_model_runner.py b/tests/ut/torchair/test_torchair_model_runner.py new file mode 100644 index 00000000..a11726f2 --- /dev/null +++ b/tests/ut/torchair/test_torchair_model_runner.py @@ -0,0 +1,45 @@ +from unittest.mock import MagicMock, Mock + +import pytest +import torch +from pytest_mock import MockerFixture +from vllm.config import VllmConfig + +from tests.ut.base import PytestBase +from vllm_ascend.torchair.torchair_model_runner import NPUTorchairModelRunner + + +class TestNPUTorchairModelRunner(PytestBase): + + @pytest.fixture + def setup_npu_torchair_model_runner(self, mocker: MockerFixture): + mocker.patch.object(NPUTorchairModelRunner, "__init__", + lambda self, *args, **kwargs: None) + runner = NPUTorchairModelRunner(Mock(), Mock()) + + runner.device = torch.device("cpu") + runner.vllm_config = MagicMock(spec=VllmConfig) + + runner.speculative_config = MagicMock( + method="deepseek_mtp", + num_speculative_tokens=4, + disable_padded_drafter_batch=False) + + runner.ascend_config = MagicMock(enable_shared_expert_dp=False, + torchair_graph_config=MagicMock( + use_cached_graph=True, + graph_batch_sizes=[1, 2, 4])) + + runner.decode_token_per_req = 2 + runner.is_kv_consumer = True + runner.max_num_reqs = 100 + + runner.model_config = MagicMock(hf_config=MagicMock(index_topk=2)) + runner.attn_backend = MagicMock(get_builder_cls=lambda: Mock()) + + return runner + + def test_init(self, mocker: MockerFixture, + setup_npu_torchair_model_runner): + runner = setup_npu_torchair_model_runner + assert isinstance(runner, NPUTorchairModelRunner) diff --git a/tests/ut/torchair/test_torchair_mtp_proposer.py b/tests/ut/torchair/test_torchair_mtp_proposer.py new file mode 100644 index 00000000..fdafce3f --- /dev/null +++ b/tests/ut/torchair/test_torchair_mtp_proposer.py @@ -0,0 +1,85 @@ +from unittest.mock import MagicMock, Mock + +import pytest +import torch +from pytest_mock import MockerFixture +from vllm.config import CacheConfig, VllmConfig + +from tests.ut.base import PytestBase +from vllm_ascend.torchair.torchair_mtp_proposer import TorchairMtpProposer +from vllm_ascend.utils import vllm_version_is + + +class TestTorchairMtpProposer(PytestBase): + + @pytest.fixture + def setup_torchair_mtp_proposer(self, mocker: MockerFixture): + vllm_config = MagicMock(spec=VllmConfig) + vllm_config.device_config = MagicMock() + vllm_config.device_config.device = torch.device("cpu") + vllm_config.speculative_config = MagicMock() + vllm_config.speculative_config.draft_model_config = MagicMock() + vllm_config.speculative_config.draft_model_config.dtype = torch.float16 + vllm_config.speculative_config.method = "deepseek_mtp" + vllm_config.speculative_config.num_speculative_tokens = 5 + vllm_config.load_config = MagicMock() + cache_config = CacheConfig(block_size=16) + vllm_config.cache_config = cache_config + vllm_config.scheduler_config = MagicMock(max_num_batched_tokens=1024, + max_num_seqs=64) + + device = torch.device("cpu") + runner = MagicMock() + runner.pcp_size = 1 + runner.dcp_size = 1 + runner.pcp_rank = 0 + runner.max_num_tokens = 1024 + runner.max_num_reqs = 10 + runner._use_aclgraph.return_value = True + + mocker.patch( + "vllm_ascend.torchair.torchair_mtp_proposer.MtpProposer.__init__", + return_value=None) + + if vllm_version_is("0.11.0"): + mock_set_default_dtype = mocker.patch( + 'vllm.model_executor.model_loader.utils.set_default_torch_dtype' + ) + else: + mock_set_default_dtype = mocker.patch( + 'vllm.utils.torch_utils.set_default_torch_dtype') + mock_set_default_dtype.return_value.__enter__.return_value = None + + mock_model_loader = MagicMock() + mocker.patch("vllm.model_executor.model_loader.get_model_loader", + return_value=mock_model_loader) + mock_layers = { + "target_attn_layer_1": Mock(), + "draft_attn_layer_2": Mock() + } + mocker.patch("vllm.config.get_layers_from_vllm_config", + return_value=mock_layers) + mock_set_current = mocker.patch("vllm.config.set_current_vllm_config") + mock_set_current.return_value.__enter__.return_value = None + mock_torchair_deepseek_mtp = MagicMock() + mock_torchair_deepseek_mtp.to.return_value = mock_torchair_deepseek_mtp + mocker.patch( + "vllm_ascend.torchair.models.torchair_deepseek_mtp.TorchairDeepSeekMTP", + return_value=mock_torchair_deepseek_mtp) + mocker.patch( + "vllm.model_executor.model_loader.utils.process_weights_after_loading" + ) + + proposer = TorchairMtpProposer(vllm_config, device, runner) + proposer.vllm_config = vllm_config + proposer.device = device + proposer.runner = runner + proposer.speculative_config = vllm_config.speculative_config + proposer.draft_model_config = vllm_config.speculative_config.draft_model_config + proposer.method = vllm_config.speculative_config.method + + return proposer, mock_model_loader, mock_torchair_deepseek_mtp + + def test_init(self, setup_torchair_mtp_proposer): + proposer, _, _, = setup_torchair_mtp_proposer + assert isinstance(proposer, TorchairMtpProposer) diff --git a/tests/ut/torchair/test_torchair_worker.py b/tests/ut/torchair/test_torchair_worker.py new file mode 100644 index 00000000..74a85179 --- /dev/null +++ b/tests/ut/torchair/test_torchair_worker.py @@ -0,0 +1,107 @@ +from unittest.mock import MagicMock, patch + +import torch +from vllm.config import CacheConfig, ModelConfig, ParallelConfig, VllmConfig + +from tests.ut.base import TestBase +from vllm_ascend.utils import vllm_version_is + +init_cache_hf_modules_path = "vllm.utils.init_cached_hf_modules" if vllm_version_is( + "0.11.0") else "vllm.utils.import_utils.init_cached_hf_modules" + + +class TestNPUTorchairWorker(TestBase): + + def setUp(self): + self.cache_config_mock = MagicMock(spec=CacheConfig) + self.cache_config_mock.cache_type = "auto" + + self.model_config_mock = MagicMock(spec=ModelConfig) + self.model_config_mock.dtype = torch.float16 + self.model_config_mock.trust_remote_code = False + + self.hf_config_mock = MagicMock() + self.hf_config_mock.model_type = "test_model" + if hasattr(self.hf_config_mock, 'index_topk'): + delattr(self.hf_config_mock, 'index_topk') + + self.model_config_mock.hf_config = self.hf_config_mock + + self.parallel_config_mock = MagicMock(spec=ParallelConfig) + + self.vllm_config_mock = MagicMock(spec=VllmConfig) + self.vllm_config_mock.cache_config = self.cache_config_mock + self.vllm_config_mock.model_config = self.model_config_mock + self.vllm_config_mock.parallel_config = self.parallel_config_mock + self.vllm_config_mock.additional_config = None + self.vllm_config_mock.load_config = None + self.vllm_config_mock.scheduler_config = None + self.vllm_config_mock.device_config = None + self.vllm_config_mock.compilation_config = None + + self.local_rank = 0 + self.rank = 0 + self.distributed_init_method = "tcp://localhost:12345" + self.is_driver_worker = False + + @patch( + "vllm_ascend.worker.worker_v1.NPUWorker._init_worker_distributed_environment" + ) + @patch("vllm_ascend.worker.worker_v1.NPUPlatform") + def test_init_device(self, mock_platform, mock_init_dist_env): + from vllm_ascend.worker.worker_v1 import NPUWorker + + mock_platform.mem_get_info.return_value = (1000, 2000) + + with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None): + worker = NPUWorker() + worker.local_rank = 1 + worker.model_config = MagicMock() + worker.model_config.seed = 42 + worker.vllm_config = MagicMock() + + result = worker._init_device() + + mock_platform.set_device.assert_called_once() + call_args = mock_platform.set_device.call_args[0][0] + self.assertEqual(str(call_args), "npu:1") + + mock_platform.empty_cache.assert_called_once() + mock_platform.seed_everything.assert_called_once_with(42) + mock_platform.mem_get_info.assert_called_once() + mock_init_dist_env.assert_called_once() + + self.assertEqual(str(result), "npu:1") + self.assertEqual(worker.init_npu_memory, 1000) + + @patch( + "vllm_ascend.worker.worker_v1.NPUWorker._init_worker_distributed_environment" + ) + @patch("vllm_ascend.worker.worker_v1.NPUPlatform") + def test_init_device_torchair_worker(self, mock_platform, + mock_init_dist_env): + from vllm_ascend.torchair.torchair_worker import NPUTorchairWorker + + mock_platform.mem_get_info.return_value = (1000, 2000) + + with patch.object(NPUTorchairWorker, "__init__", + lambda x, **kwargs: None): + worker = NPUTorchairWorker() + worker.local_rank = 1 + worker.model_config = MagicMock() + worker.model_config.seed = 42 + worker.vllm_config = MagicMock() + + result = worker._init_device() + + mock_platform.set_device.assert_called_once() + call_args = mock_platform.set_device.call_args[0][0] + self.assertEqual(str(call_args), "npu:1") + + mock_platform.empty_cache.assert_called_once() + mock_platform.seed_everything.assert_called_once_with(42) + mock_platform.mem_get_info.assert_called_once() + mock_init_dist_env.assert_called_once() + + self.assertEqual(str(result), "npu:1") + self.assertEqual(worker.init_npu_memory, 1000)