From 0c1d239df43fdbe040df5c7c72c2c8b8c1614ee1 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Sun, 6 Jul 2025 10:42:27 +0800 Subject: [PATCH] Add unit test local cpu guide and enable base testcase (#1566) ### What this PR does / why we need it? Use Base test and cleanup all manaul patch code - Cleanup EPLB config to avoid tmp test file - Use BaseTest with global cache - Add license - Add a doc to setup unit test in local env ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed Signed-off-by: Yikun Jiang --- .../developer_guide/contribution/testing.md | 119 ++++++++++++++++-- tests/ut/attention/test_attention_v1.py | 2 - tests/ut/base.py | 19 +++ tests/ut/distributed/test_parallel_state.py | 19 ++- tests/ut/ops/expert_map.json | 17 +++ tests/ut/ops/test_expert_load_balancer.py | 56 ++++----- .../patch_common/test_patch_distributed.py | 15 +++ .../worker/patch_common/test_patch_sampler.py | 7 +- tests/ut/quantization/test_quant_config.py | 2 - tests/ut/quantization/test_quantizer.py | 2 - tests/ut/quantization/test_w8a8.py | 2 - tests/ut/test_ascend_config.py | 19 ++- tests/ut/test_utils.py | 18 ++- 13 files changed, 239 insertions(+), 58 deletions(-) create mode 100644 tests/ut/ops/expert_map.json diff --git a/docs/source/developer_guide/contribution/testing.md b/docs/source/developer_guide/contribution/testing.md index c7f413e..7ad5f1f 100644 --- a/docs/source/developer_guide/contribution/testing.md +++ b/docs/source/developer_guide/contribution/testing.md @@ -9,8 +9,52 @@ The fastest way to setup test environment is to use the main branch container im :::::{tab-set} :sync-group: e2e -::::{tab-item} Single card +::::{tab-item} Local (CPU) :selected: +:sync: cpu + +You can run the unit tests on CPU with the following steps: + +```{code-block} bash + :substitutions: + +cd ~/vllm-project/ +# ls +# vllm vllm-ascend + +# Use mirror to speedup download +# docker pull quay.nju.edu.cn/ascend/cann:|cann_image_tag| +export IMAGE=quay.io/ascend/cann:|cann_image_tag| +docker run --rm --name vllm-ascend-ut \ + -v $(pwd):/vllm-project \ + -v ~/.cache:/root/.cache \ + -ti $IMAGE bash + +# (Optional) Configure mirror to speedup download +sed -i 's|ports.ubuntu.com|mirrors.huaweicloud.com|g' /etc/apt/sources.list +pip config set global.index-url https://mirrors.huaweicloud.com/repository/pypi/simple/ + +# For torch-npu dev version or x86 machine +export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu/ https://mirrors.huaweicloud.com/ascend/repos/pypi" + +apt-get update -y +apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev curl gnupg2 + +# Install vllm +cd /vllm-project/vllm +VLLM_TARGET_DEVICE=empty python3 -m pip -v install . + +# Install vllm-ascend +cd /vllm-project/vllm-ascend +# [IMPORTANT] Import LD_LIBRARY_PATH to enumerate the CANN environment under CPU +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/$(uname -m)-linux/devlib +python3 -m pip install -r requirements-dev.txt +python3 -m pip install -v . +``` + +:::: + +::::{tab-item} Single card :sync: single ```{code-block} bash @@ -36,6 +80,16 @@ docker run --rm \ -it $IMAGE bash ``` +After starting the container, you should install the required packages: + +```bash +# Prepare +pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple + +# Install required packages +pip install -r requirements-dev.txt +``` + :::: ::::{tab-item} Multi cards @@ -63,13 +117,12 @@ docker run --rm \ -p 8000:8000 \ -it $IMAGE bash ``` -:::: - -::::: After starting the container, you should install the required packages: ```bash +cd /vllm-workspace/vllm-ascend/ + # Prepare pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple @@ -77,6 +130,10 @@ pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/si pip install -r requirements-dev.txt ``` +:::: + +::::: + ## Running tests ### Unit test @@ -89,14 +146,48 @@ There are several principles to follow when writing unit tests: - Example: [tests/ut/test_ascend_config.py](https://github.com/vllm-project/vllm-ascend/blob/main/tests/ut/test_ascend_config.py). - You can run the unit tests using `pytest`: - ```bash - cd /vllm-workspace/vllm-ascend/ - # Run all single card the tests - pytest -sv tests/ut +:::::{tab-set} +:sync-group: e2e - # Run - pytest -sv tests/ut/test_ascend_config.py - ``` +::::{tab-item} Local (CPU) +:selected: +:sync: cpu + +```bash +# Run unit tests +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/$(uname -m)-linux/devlib +VLLM_USE_V1=1 TORCH_DEVICE_BACKEND_AUTOLOAD=0 pytest -sv tests/ut +``` + +:::: + +::::{tab-item} Single card +:sync: single + +```bash +cd /vllm-workspace/vllm-ascend/ +# Run all single card the tests +pytest -sv tests/ut + +# Run single test +pytest -sv tests/ut/test_ascend_config.py +``` +:::: + +::::{tab-item} Multi cards test +:sync: multi + +```bash +cd /vllm-workspace/vllm-ascend/ +# Run all single card the tests +pytest -sv tests/ut + +# Run single test +pytest -sv tests/ut/test_ascend_config.py +``` +:::: + +::::: ### E2E test @@ -106,6 +197,12 @@ locally. :::::{tab-set} :sync-group: e2e +::::{tab-item} Local (CPU) +:sync: cpu + +You can't run e2e test on CPU. +:::: + ::::{tab-item} Single card :selected: :sync: single diff --git a/tests/ut/attention/test_attention_v1.py b/tests/ut/attention/test_attention_v1.py index 3e0242b..e9ce36e 100644 --- a/tests/ut/attention/test_attention_v1.py +++ b/tests/ut/attention/test_attention_v1.py @@ -1,5 +1,3 @@ -import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa - from unittest.mock import MagicMock, patch import torch diff --git a/tests/ut/base.py b/tests/ut/base.py index 99c6d5b..e34f175 100644 --- a/tests/ut/base.py +++ b/tests/ut/base.py @@ -1,7 +1,26 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# + import unittest from vllm_ascend.utils import adapt_patch +# fused moe ops test will hit the infer_schema error, we need add the patch +# here to make the test pass. +import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa + class TestBase(unittest.TestCase): diff --git a/tests/ut/distributed/test_parallel_state.py b/tests/ut/distributed/test_parallel_state.py index fa7cef7..b00eeb9 100644 --- a/tests/ut/distributed/test_parallel_state.py +++ b/tests/ut/distributed/test_parallel_state.py @@ -1,16 +1,31 @@ -import unittest +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# + from unittest.mock import MagicMock, patch import pytest from vllm.distributed.parallel_state import GroupCoordinator import vllm_ascend +from tests.ut.base import TestBase from vllm_ascend.distributed.parallel_state import ( destory_ascend_model_parallel, get_ep_group, get_etp_group, init_ascend_model_parallel, model_parallel_initialized) -class TestParallelState(unittest.TestCase): +class TestParallelState(TestBase): @patch('vllm_ascend.distributed.parallel_state._EP', new_callable=lambda: MagicMock(spec=GroupCoordinator)) diff --git a/tests/ut/ops/expert_map.json b/tests/ut/ops/expert_map.json new file mode 100644 index 0000000..bb74799 --- /dev/null +++ b/tests/ut/ops/expert_map.json @@ -0,0 +1,17 @@ +{ + "moe_layer_count": + 1, + "layer_list": [{ + "layer_id": + 0, + "device_count": + 2, + "device_list": [{ + "device_id": 0, + "device_expert": [7, 2, 0, 3, 5] + }, { + "device_id": 1, + "device_expert": [6, 1, 4, 7, 2] + }] + }] +} diff --git a/tests/ut/ops/test_expert_load_balancer.py b/tests/ut/ops/test_expert_load_balancer.py index 3b7a69d..97beada 100644 --- a/tests/ut/ops/test_expert_load_balancer.py +++ b/tests/ut/ops/test_expert_load_balancer.py @@ -1,14 +1,26 @@ -# fused moe ops test will hit the infer_schema error, we need add the patch -# here to make the test pass. -import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# import json -import unittest +import os from typing import List, TypedDict from unittest import mock import torch +from tests.ut.base import TestBase from vllm_ascend.ops.expert_load_balancer import ExpertLoadBalancer @@ -28,31 +40,13 @@ class MockData(TypedDict): layer_list: List[Layer] -MOCK_DATA: MockData = { - "moe_layer_count": - 1, - "layer_list": [{ - "layer_id": - 0, - "device_count": - 2, - "device_list": [{ - "device_id": 0, - "device_expert": [7, 2, 0, 3, 5] - }, { - "device_id": 1, - "device_expert": [6, 1, 4, 7, 2] - }] - }] -} - - -class TestExpertLoadBalancer(unittest.TestCase): +class TestExpertLoadBalancer(TestBase): def setUp(self): - json_file = "expert_map.json" - with open(json_file, 'w') as f: - json.dump(MOCK_DATA, f) + _TEST_DIR = os.path.dirname(__file__) + json_file = _TEST_DIR + "/expert_map.json" + with open(json_file, 'r') as f: + self.expert_map: MockData = json.load(f) self.expert_load_balancer = ExpertLoadBalancer(json_file, global_expert_num=8) @@ -62,9 +56,9 @@ class TestExpertLoadBalancer(unittest.TestCase): self.assertIsInstance(self.expert_load_balancer.expert_map_tensor, torch.Tensor) self.assertEqual(self.expert_load_balancer.layers_num, - MOCK_DATA["moe_layer_count"]) + self.expert_map["moe_layer_count"]) self.assertEqual(self.expert_load_balancer.ranks_num, - MOCK_DATA["layer_list"][0]["device_count"]) + self.expert_map["layer_list"][0]["device_count"]) def test_generate_index_dicts(self): tensor_2d = torch.tensor([[7, 2, 0, 3, 5], [6, 1, 4, 7, 2]]) @@ -142,6 +136,6 @@ class TestExpertLoadBalancer(unittest.TestCase): def test_get_global_redundant_expert_num(self): redundant_expert_num = self.expert_load_balancer.get_global_redundant_expert_num( ) - expected_redundant_expert_num = len(MOCK_DATA["layer_list"][0]["device_list"][0]["device_expert"]) * \ - MOCK_DATA["layer_list"][0]["device_count"] - 8 + expected_redundant_expert_num = len(self.expert_map["layer_list"][0]["device_list"][0]["device_expert"]) * \ + self.expert_map["layer_list"][0]["device_count"] - 8 self.assertEqual(redundant_expert_num, expected_redundant_expert_num) diff --git a/tests/ut/patch/worker/patch_common/test_patch_distributed.py b/tests/ut/patch/worker/patch_common/test_patch_distributed.py index 1ddc614..73525ee 100644 --- a/tests/ut/patch/worker/patch_common/test_patch_distributed.py +++ b/tests/ut/patch/worker/patch_common/test_patch_distributed.py @@ -1,3 +1,18 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# + from tests.ut.base import TestBase diff --git a/tests/ut/patch/worker/patch_common/test_patch_sampler.py b/tests/ut/patch/worker/patch_common/test_patch_sampler.py index d882af7..b87175d 100644 --- a/tests/ut/patch/worker/patch_common/test_patch_sampler.py +++ b/tests/ut/patch/worker/patch_common/test_patch_sampler.py @@ -1,17 +1,20 @@ import importlib import os -import unittest from unittest import mock import torch from vllm.v1.sample.ops import topk_topp_sampler +from tests.ut.base import TestBase -class TestTopKTopPSamplerOptimize(unittest.TestCase): + +class TestTopKTopPSamplerOptimize(TestBase): @mock.patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE": "1"}) @mock.patch("torch_npu.npu_top_k_top_p") def test_npu_topk_topp_called_when_optimized(self, mock_npu_op): + # We have to patch and reload because the patch will take effect + # only after VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE is set. import vllm_ascend.patch.worker.patch_0_9_1.patch_sampler importlib.reload(vllm_ascend.patch.worker.patch_0_9_1.patch_sampler) diff --git a/tests/ut/quantization/test_quant_config.py b/tests/ut/quantization/test_quant_config.py index 3f20c01..6591d93 100644 --- a/tests/ut/quantization/test_quant_config.py +++ b/tests/ut/quantization/test_quant_config.py @@ -1,5 +1,3 @@ -import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa - from unittest.mock import MagicMock, patch import torch diff --git a/tests/ut/quantization/test_quantizer.py b/tests/ut/quantization/test_quantizer.py index a827364..559cf19 100644 --- a/tests/ut/quantization/test_quantizer.py +++ b/tests/ut/quantization/test_quantizer.py @@ -1,5 +1,3 @@ -import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa - from unittest.mock import MagicMock, patch from tests.ut.base import TestBase diff --git a/tests/ut/quantization/test_w8a8.py b/tests/ut/quantization/test_w8a8.py index 69faf10..392355a 100644 --- a/tests/ut/quantization/test_w8a8.py +++ b/tests/ut/quantization/test_w8a8.py @@ -1,5 +1,3 @@ -import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa - import unittest from unittest.mock import MagicMock, patch diff --git a/tests/ut/test_ascend_config.py b/tests/ut/test_ascend_config.py index 3d0cee8..a123790 100644 --- a/tests/ut/test_ascend_config.py +++ b/tests/ut/test_ascend_config.py @@ -1,17 +1,32 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# + import os -import unittest from unittest import mock from transformers import PretrainedConfig from vllm.config import ModelConfig, VllmConfig +from tests.ut.base import TestBase from vllm_ascend.ascend_config import (_check_torchair_supported, check_ascend_config, clear_ascend_config, get_ascend_config, init_ascend_config) -class TestAscendConfig(unittest.TestCase): +class TestAscendConfig(TestBase): @staticmethod def _clean_up_ascend_config(func): diff --git a/tests/ut/test_utils.py b/tests/ut/test_utils.py index 577a7ab..bceeb0a 100644 --- a/tests/ut/test_utils.py +++ b/tests/ut/test_utils.py @@ -1,4 +1,17 @@ -import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# import math import os @@ -10,10 +23,11 @@ import torch from vllm.config import (CompilationConfig, ModelConfig, ParallelConfig, VllmConfig) +from tests.ut.base import TestBase from vllm_ascend import utils -class TestUtils(unittest.TestCase): +class TestUtils(TestBase): def test_is_310p(self): utils._IS_310P = None