xc-llm-ascend/tests/ut/torchair/test_torchair_worker.py

from unittest.mock import MagicMock, patch

import torch
from vllm.config import CacheConfig, ModelConfig, ParallelConfig, VllmConfig

from tests.ut.base import TestBase

init_cache_hf_modules_path = "vllm.utils.import_utils.init_cached_hf_modules"


class TestNPUTorchairWorker(TestBase):

    def setUp(self):
        self.cache_config_mock = MagicMock(spec=CacheConfig)
        self.cache_config_mock.cache_type = "auto"

        self.model_config_mock = MagicMock(spec=ModelConfig)
        self.model_config_mock.dtype = torch.float16
        self.model_config_mock.trust_remote_code = False

        self.hf_config_mock = MagicMock()
        self.hf_config_mock.model_type = "test_model"
        if hasattr(self.hf_config_mock, 'index_topk'):
            delattr(self.hf_config_mock, 'index_topk')

        self.model_config_mock.hf_config = self.hf_config_mock

        self.parallel_config_mock = MagicMock(spec=ParallelConfig)

        self.vllm_config_mock = MagicMock(spec=VllmConfig)
        self.vllm_config_mock.cache_config = self.cache_config_mock
        self.vllm_config_mock.model_config = self.model_config_mock
        self.vllm_config_mock.parallel_config = self.parallel_config_mock
        self.vllm_config_mock.additional_config = None
        self.vllm_config_mock.load_config = None
        self.vllm_config_mock.scheduler_config = None
        self.vllm_config_mock.device_config = None
        self.vllm_config_mock.compilation_config = None

        self.local_rank = 0
        self.rank = 0
        self.distributed_init_method = "tcp://localhost:12345"
        self.is_driver_worker = False

    @patch(
        "vllm_ascend.worker.worker_v1.NPUWorker._init_worker_distributed_environment"
    )
    @patch("vllm_ascend.worker.worker_v1.NPUPlatform")
    def test_init_device(self, mock_platform, mock_init_dist_env):
        from vllm_ascend.worker.worker_v1 import NPUWorker

        mock_platform.mem_get_info.return_value = (1000, 2000)

        with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
            worker = NPUWorker()
            worker.local_rank = 1
            worker.model_config = MagicMock()
            worker.model_config.seed = 42
            worker.vllm_config = MagicMock()
            worker.parallel_config = MagicMock()
            worker.parallel_config.local_world_size = 0
            worker.parallel_config.data_parallel_size = 1

            result = worker._init_device()

            mock_platform.set_device.assert_called_once()
            call_args = mock_platform.set_device.call_args[0][0]
            self.assertEqual(str(call_args), "npu:1")

            mock_platform.empty_cache.assert_called_once()
            mock_platform.seed_everything.assert_called_once_with(42)
            mock_platform.mem_get_info.assert_called_once()
            mock_init_dist_env.assert_called_once()

            self.assertEqual(str(result), "npu:1")
            self.assertEqual(worker.init_npu_memory, 1000)

    @patch(
        "vllm_ascend.worker.worker_v1.NPUWorker._init_worker_distributed_environment"
    )
    @patch("vllm_ascend.worker.worker_v1.NPUPlatform")
    def test_init_device_torchair_worker(self, mock_platform,
                                         mock_init_dist_env):
        from vllm_ascend.torchair.torchair_worker import NPUTorchairWorker

        mock_platform.mem_get_info.return_value = (1000, 2000)

        with patch.object(NPUTorchairWorker, "__init__",
                          lambda x, **kwargs: None):
            worker = NPUTorchairWorker()
            worker.local_rank = 1
            worker.model_config = MagicMock()
            worker.model_config.seed = 42
            worker.vllm_config = MagicMock()
            worker.parallel_config = MagicMock()
            worker.parallel_config.local_world_size = 0
            worker.parallel_config.data_parallel_size = 1

            result = worker._init_device()

            mock_platform.set_device.assert_called_once()
            call_args = mock_platform.set_device.call_args[0][0]
            self.assertEqual(str(call_args), "npu:1")

            mock_platform.empty_cache.assert_called_once()
            mock_platform.seed_everything.assert_called_once_with(42)
            mock_platform.mem_get_info.assert_called_once()
            mock_init_dist_env.assert_called_once()

            self.assertEqual(str(result), "npu:1")
            self.assertEqual(worker.init_npu_memory, 1000)
[Test] Add ut test for torchair (#4287) ### What this PR does / why we need it? The current community lacks unit tests (UT) for files such as torchair_worker, mtp_proposer, and model_runner. Therefore, UT coverage for these files needs to be added. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? - vLLM version: v0.11.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379 --------- Signed-off-by: CodeNine-CJ <chenjian343@huawei.com> 2025-11-21 16:33:34 +08:00			`from unittest.mock import MagicMock, patch`

			`import torch`
			`from vllm.config import CacheConfig, ModelConfig, ParallelConfig, VllmConfig`

			`from tests.ut.base import TestBase`

Drop 0.11.0 support (#4377) There is a lot hack code for v0.11.0, which makes the code hard to upgrade to newer vLLM version. Since v0.11.0 will release soon. Let's drop v0.11.0 support first. Then we'll upgrade to v0.11.2 soon. - vLLM version: v0.11.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379 Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> 2025-11-24 17:08:20 +08:00			`init_cache_hf_modules_path = "vllm.utils.import_utils.init_cached_hf_modules"`
[Test] Add ut test for torchair (#4287) ### What this PR does / why we need it? The current community lacks unit tests (UT) for files such as torchair_worker, mtp_proposer, and model_runner. Therefore, UT coverage for these files needs to be added. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? - vLLM version: v0.11.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379 --------- Signed-off-by: CodeNine-CJ <chenjian343@huawei.com> 2025-11-21 16:33:34 +08:00

			`class TestNPUTorchairWorker(TestBase):`

			`def setUp(self):`
			`self.cache_config_mock = MagicMock(spec=CacheConfig)`
			`self.cache_config_mock.cache_type = "auto"`

			`self.model_config_mock = MagicMock(spec=ModelConfig)`
			`self.model_config_mock.dtype = torch.float16`
			`self.model_config_mock.trust_remote_code = False`

			`self.hf_config_mock = MagicMock()`
			`self.hf_config_mock.model_type = "test_model"`
			`if hasattr(self.hf_config_mock, 'index_topk'):`
			`delattr(self.hf_config_mock, 'index_topk')`

			`self.model_config_mock.hf_config = self.hf_config_mock`

			`self.parallel_config_mock = MagicMock(spec=ParallelConfig)`

			`self.vllm_config_mock = MagicMock(spec=VllmConfig)`
			`self.vllm_config_mock.cache_config = self.cache_config_mock`
			`self.vllm_config_mock.model_config = self.model_config_mock`
			`self.vllm_config_mock.parallel_config = self.parallel_config_mock`
			`self.vllm_config_mock.additional_config = None`
			`self.vllm_config_mock.load_config = None`
			`self.vllm_config_mock.scheduler_config = None`
			`self.vllm_config_mock.device_config = None`
			`self.vllm_config_mock.compilation_config = None`

			`self.local_rank = 0`
			`self.rank = 0`
			`self.distributed_init_method = "tcp://localhost:12345"`
			`self.is_driver_worker = False`

			`@patch(`
			`"vllm_ascend.worker.worker_v1.NPUWorker._init_worker_distributed_environment"`
			`)`
			`@patch("vllm_ascend.worker.worker_v1.NPUPlatform")`
			`def test_init_device(self, mock_platform, mock_init_dist_env):`
			`from vllm_ascend.worker.worker_v1 import NPUWorker`

			`mock_platform.mem_get_info.return_value = (1000, 2000)`

			`with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):`
			`worker = NPUWorker()`
			`worker.local_rank = 1`
			`worker.model_config = MagicMock()`
			`worker.model_config.seed = 42`
			`worker.vllm_config = MagicMock()`
upgrade to vllm 0.11.2 (#4400) Bump vLLM version to v0.11.2 What's broken and changed by vLLM: 1. structured_output is broken by https://github.com/vllm-project/vllm/pull/26866 2. get_mrope_input_positions is broken by https://github.com/vllm-project/vllm/pull/28399 3. graph mode is broken by https://github.com/vllm-project/vllm/pull/25110 we'll upgrade torch to 2.8 to fix the problem later 4. embedding is broken by https://github.com/vllm-project/vllm/pull/27583 5. `get_attn_backend_cls` and attention backend is broken are broken by https://github.com/vllm-project/vllm/pull/28534 6. spec decode is broken by https://github.com/vllm-project/vllm/pull/28771 7. sp feature is broken by https://github.com/vllm-project/vllm/pull/27126 8. mtp is broken by https://github.com/vllm-project/vllm/pull/27922 9. lora is broken by https://github.com/vllm-project/vllm/pull/21068 10. execute_model is broken by https://github.com/vllm-project/vllm/pull/26866 11. `VLLM_DISABLE_SHARED_EXPERTS_STREAM` env is broken by https://github.com/vllm-project/vllm/pull/28159 12. kv cahe is broken by https://github.com/vllm-project/vllm/pull/27753 13. dp is broken by https://github.com/vllm-project/vllm/pull/25110 What's broken and changed by ourself: 1. qwen vl is broken by https://github.com/vllm-project/vllm/pull/28455 We'll remove model files in the future to avoid this kind of error 2. Engine core is broken by https://github.com/vllm-project/vllm/pull/23691 We'll remove the patch file in the future. 3. Ascend scheduler is broken by https://github.com/vllm-project/vllm/pull/28733 We'll remove ascend scheudler later. 4. qwen3-next is broken by https://github.com/vllm-project/vllm/pull/28083 We'll remove model files in the future to avoid this kind of error 5. qwen vl is broken by https://github.com/vllm-project/vllm/pull/27764. We'll remove model files in the future Known issue: 1. ray doesn't work 2. the accuracy of qwen3-next is not correct 3. qwen3-vl is broken 4. prefix cache+ ascend scheduler + deepseek v2 lite is broken. Co-authored-by: MengqingCao <cmq0113@163.com> Co-authored-by: hfadzxy <starmoon_zhang@163.com> Co-authored-by: leo-pony <nengjunma@outlook.com> Co-authored-by: 22dimensions <waitingwind@foxmail.com> Co-authored-by: shen-shanshan <467638484@qq.com> - vLLM version: v0.11.2 --------- Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> Signed-off-by: MengqingCao <cmq0113@163.com> Signed-off-by: hfadzxy <starmoon_zhang@163.com> Signed-off-by: leo-pony <nengjunma@outlook.com> Co-authored-by: MengqingCao <cmq0113@163.com> Co-authored-by: hfadzxy <starmoon_zhang@163.com> Co-authored-by: leo-pony <nengjunma@outlook.com> 2025-11-26 11:48:58 +08:00			`worker.parallel_config = MagicMock()`
			`worker.parallel_config.local_world_size = 0`
[bugfix] fix ray start failed: local_world_size cannot little than visible device count error (#4457) ### What this PR does / why we need it? Fix the ray start failed bug: local_world_size cannot little than visible device count error detail see issue #4456. This fix code is copied from vllm fixing modify, PR: [#28873](https://github.com/vllm-project/vllm/pull/28873) - vLLM version: v0.11.2 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.2 --------- Signed-off-by: leo-pony <nengjunma@outlook.com> 2025-11-27 21:18:32 +08:00			`worker.parallel_config.data_parallel_size = 1`
[Test] Add ut test for torchair (#4287) ### What this PR does / why we need it? The current community lacks unit tests (UT) for files such as torchair_worker, mtp_proposer, and model_runner. Therefore, UT coverage for these files needs to be added. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? - vLLM version: v0.11.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379 --------- Signed-off-by: CodeNine-CJ <chenjian343@huawei.com> 2025-11-21 16:33:34 +08:00
			`result = worker._init_device()`

			`mock_platform.set_device.assert_called_once()`
			`call_args = mock_platform.set_device.call_args[0][0]`
			`self.assertEqual(str(call_args), "npu:1")`

			`mock_platform.empty_cache.assert_called_once()`
			`mock_platform.seed_everything.assert_called_once_with(42)`
			`mock_platform.mem_get_info.assert_called_once()`
			`mock_init_dist_env.assert_called_once()`

			`self.assertEqual(str(result), "npu:1")`
			`self.assertEqual(worker.init_npu_memory, 1000)`

			`@patch(`
			`"vllm_ascend.worker.worker_v1.NPUWorker._init_worker_distributed_environment"`
			`)`
			`@patch("vllm_ascend.worker.worker_v1.NPUPlatform")`
			`def test_init_device_torchair_worker(self, mock_platform,`
			`mock_init_dist_env):`
			`from vllm_ascend.torchair.torchair_worker import NPUTorchairWorker`

			`mock_platform.mem_get_info.return_value = (1000, 2000)`

			`with patch.object(NPUTorchairWorker, "__init__",`
			`lambda x, **kwargs: None):`
			`worker = NPUTorchairWorker()`
			`worker.local_rank = 1`
			`worker.model_config = MagicMock()`
			`worker.model_config.seed = 42`
			`worker.vllm_config = MagicMock()`
upgrade to vllm 0.11.2 (#4400) Bump vLLM version to v0.11.2 What's broken and changed by vLLM: 1. structured_output is broken by https://github.com/vllm-project/vllm/pull/26866 2. get_mrope_input_positions is broken by https://github.com/vllm-project/vllm/pull/28399 3. graph mode is broken by https://github.com/vllm-project/vllm/pull/25110 we'll upgrade torch to 2.8 to fix the problem later 4. embedding is broken by https://github.com/vllm-project/vllm/pull/27583 5. `get_attn_backend_cls` and attention backend is broken are broken by https://github.com/vllm-project/vllm/pull/28534 6. spec decode is broken by https://github.com/vllm-project/vllm/pull/28771 7. sp feature is broken by https://github.com/vllm-project/vllm/pull/27126 8. mtp is broken by https://github.com/vllm-project/vllm/pull/27922 9. lora is broken by https://github.com/vllm-project/vllm/pull/21068 10. execute_model is broken by https://github.com/vllm-project/vllm/pull/26866 11. `VLLM_DISABLE_SHARED_EXPERTS_STREAM` env is broken by https://github.com/vllm-project/vllm/pull/28159 12. kv cahe is broken by https://github.com/vllm-project/vllm/pull/27753 13. dp is broken by https://github.com/vllm-project/vllm/pull/25110 What's broken and changed by ourself: 1. qwen vl is broken by https://github.com/vllm-project/vllm/pull/28455 We'll remove model files in the future to avoid this kind of error 2. Engine core is broken by https://github.com/vllm-project/vllm/pull/23691 We'll remove the patch file in the future. 3. Ascend scheduler is broken by https://github.com/vllm-project/vllm/pull/28733 We'll remove ascend scheudler later. 4. qwen3-next is broken by https://github.com/vllm-project/vllm/pull/28083 We'll remove model files in the future to avoid this kind of error 5. qwen vl is broken by https://github.com/vllm-project/vllm/pull/27764. We'll remove model files in the future Known issue: 1. ray doesn't work 2. the accuracy of qwen3-next is not correct 3. qwen3-vl is broken 4. prefix cache+ ascend scheduler + deepseek v2 lite is broken. Co-authored-by: MengqingCao <cmq0113@163.com> Co-authored-by: hfadzxy <starmoon_zhang@163.com> Co-authored-by: leo-pony <nengjunma@outlook.com> Co-authored-by: 22dimensions <waitingwind@foxmail.com> Co-authored-by: shen-shanshan <467638484@qq.com> - vLLM version: v0.11.2 --------- Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> Signed-off-by: MengqingCao <cmq0113@163.com> Signed-off-by: hfadzxy <starmoon_zhang@163.com> Signed-off-by: leo-pony <nengjunma@outlook.com> Co-authored-by: MengqingCao <cmq0113@163.com> Co-authored-by: hfadzxy <starmoon_zhang@163.com> Co-authored-by: leo-pony <nengjunma@outlook.com> 2025-11-26 11:48:58 +08:00			`worker.parallel_config = MagicMock()`
			`worker.parallel_config.local_world_size = 0`
[bugfix] fix ray start failed: local_world_size cannot little than visible device count error (#4457) ### What this PR does / why we need it? Fix the ray start failed bug: local_world_size cannot little than visible device count error detail see issue #4456. This fix code is copied from vllm fixing modify, PR: [#28873](https://github.com/vllm-project/vllm/pull/28873) - vLLM version: v0.11.2 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.2 --------- Signed-off-by: leo-pony <nengjunma@outlook.com> 2025-11-27 21:18:32 +08:00			`worker.parallel_config.data_parallel_size = 1`
[Test] Add ut test for torchair (#4287) ### What this PR does / why we need it? The current community lacks unit tests (UT) for files such as torchair_worker, mtp_proposer, and model_runner. Therefore, UT coverage for these files needs to be added. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? - vLLM version: v0.11.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379 --------- Signed-off-by: CodeNine-CJ <chenjian343@huawei.com> 2025-11-21 16:33:34 +08:00
			`result = worker._init_device()`

			`mock_platform.set_device.assert_called_once()`
			`call_args = mock_platform.set_device.call_args[0][0]`
			`self.assertEqual(str(call_args), "npu:1")`

			`mock_platform.empty_cache.assert_called_once()`
			`mock_platform.seed_everything.assert_called_once_with(42)`
			`mock_platform.mem_get_info.assert_called_once()`
			`mock_init_dist_env.assert_called_once()`

			`self.assertEqual(str(result), "npu:1")`
			`self.assertEqual(worker.init_npu_memory, 1000)`