[CI] Add unit test framework (#1201)

This PR added the unit test framework to enable ut for vLLM Ascend. Unit test runs on CPU machines. It'll be ran once lint check is passed the same as e2e test. For unit test, this PR created a new folder called `ut` under `tests` module. All the test file in `ut` should keep the same with the code in `vllm-ascend`. The file name should be start with `test_` prefix. For example, in this PR. the `test_ascend_config.py` is added for `ascend_config.py` test. A new fille `worker/test_worker_v1.py` is also added as the placeholder. This file should be the unit test for `vllm-ascend/worker/worker_v1.py`. Additional, a new `fake_weight` folder is added, it contains the config.json from `facebook/opt-125m`, so that the test will not always visit huggingface. TODO: We should add all the unit test file one by one in the future. Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-06-16 18:32:28 +08:00
parent 966557a2a3
commit 69b817ed65
57 changed files with 396 additions and 267 deletions
--- a/.github/workflows/vllm_ascend_test.yaml
+++ b/.github/workflows/vllm_ascend_test.yaml
@@ -15,7 +15,7 @@
 # This file is a part of the vllm-ascend project.
 #

-name: 'e2e test / basic'
+name: 'test'

 on:
  schedule:
@@ -114,6 +114,56 @@ jobs:
          echo "::add-matcher::.github/workflows/matchers/mypy.json"
          tools/mypy.sh 1 ${{ matrix.python-version }}

+  ut:
+    needs: [lint]
+    name: unit test
+    if: ${{ needs.lint.result == 'success' }}
+    runs-on: ubuntu-latest
+    container:
+      image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
+      env:
+        VLLM_LOGGING_LEVEL: ERROR
+        VLLM_USE_MODELSCOPE: True
+    strategy:
+      matrix:
+        vllm_version: [main, v0.9.1]
+    steps:
+      - name: Install packages
+        run: |
+          apt-get update -y
+          apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev
+
+      - name: Checkout vllm-project/vllm repo
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm
+          ref: ${{ matrix.vllm_version }}
+          path: ./vllm-empty
+
+      - name: Install vllm-project/vllm from source
+        working-directory: ./vllm-empty
+        run: |
+          VLLM_TARGET_DEVICE=empty python3 -m pip install . --extra-index https://download.pytorch.org/whl/cpu/
+          python3 -m pip uninstall -y triton
+
+      - name: Checkout vllm-project/vllm-ascend repo
+        uses: actions/checkout@v4
+
+      - name: Install vllm-project/vllm-ascend
+        run: |
+          export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
+          python3 -m pip install -r requirements-dev.txt --extra-index https://download.pytorch.org/whl/cpu/
+          python3 -m pip install -v . --extra-index https://download.pytorch.org/whl/cpu/
+
+      - name: Run unit test for V1 Engine
+        env:
+          VLLM_USE_V1: 1
+          VLLM_WORKER_MULTIPROC_METHOD: spawn
+          TORCH_DEVICE_BACKEND_AUTOLOAD: 0
+        run: |
+          export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
+          pytest -sv tests/ut
+
  e2e:
    needs: [lint]
    if: ${{ needs.lint.result == 'success' }}
@@ -122,7 +172,7 @@ jobs:
      matrix:
        os: [linux-arm64-npu-1]
        vllm_version: [main, v0.9.1]
-    name: vLLM Ascend test
+    name: singlecard e2e test
    runs-on: ${{ matrix.os }}
    container:
      # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
@@ -168,53 +218,47 @@ jobs:
          pip install -r requirements-dev.txt
          pip install -v -e .

-      - name: Run vllm-project/vllm-ascend test for V1 Engine
+      - name: Run e2e test for V1 Engine
        env:
          VLLM_USE_V1: 1
          VLLM_WORKER_MULTIPROC_METHOD: spawn
          VLLM_USE_MODELSCOPE: True
        run: |
-          pytest -sv tests/singlecard/test_offline_inference.py
+          pytest -sv tests/e2e/singlecard/test_offline_inference.py
          # TODO: switch hf to modelscope
          VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
-            pytest -sv tests/singlecard/test_ilama_lora.py
+            pytest -sv tests/e2e/singlecard/test_ilama_lora.py
          # TODO(sss): guided decoding doesn't work, fix it later
-          # pytest -sv tests/singlecard/test_guided_decoding.py
-          # test_ascend_config.py should be ran separately because it will regenerate the global config many times.
-          pytest -sv tests/singlecard/test_ascend_config.py
-          pytest -sv tests/singlecard/test_camem.py
-          pytest -sv tests/singlecard/ \
-          --ignore=tests/singlecard/test_offline_inference.py \
-          --ignore=tests/singlecard/test_ilama_lora.py \
-          --ignore=tests/singlecard/test_guided_decoding.py \
-          --ignore=tests/singlecard/test_ascend_config.py \
-          --ignore=tests/singlecard/test_camem.py
+          # pytest -sv tests/e2e/singlecard/test_guided_decoding.py
+          pytest -sv tests/e2e/singlecard/test_camem.py
+          pytest -sv tests/e2e/singlecard/ \
+          --ignore=tests/e2e/singlecard/test_offline_inference.py \
+          --ignore=tests/e2e/singlecard/test_ilama_lora.py \
+          --ignore=tests/e2e/singlecard/test_guided_decoding.py \
+          --ignore=tests/e2e/singlecard/test_camem.py

-      - name: Run vllm-project/vllm-ascend test on V0 engine
+      - name: Run e2e test on V0 engine
        if: ${{ github.event_name == 'schedule' }}
        env:
          VLLM_USE_V1: 0
          VLLM_USE_MODELSCOPE: True
        run: |
-          pytest -sv tests/singlecard/test_offline_inference.py
+          pytest -sv tests/e2e/singlecard/test_offline_inference.py
          # TODO: switch hf to modelscope
          VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
-            pytest -sv tests/singlecard/test_ilama_lora.py
+            pytest -sv tests/e2e/singlecard/test_ilama_lora.py
          # guided decoding doesn't work, fix it later
-          # pytest -sv tests/singlecard/test_guided_decoding.py
-          pytest -sv tests/singlecard/test_camem.py
-          # test_ascend_config.py should be ran separately because it will regenerate the global config many times.
-          pytest -sv tests/singlecard/test_ascend_config.py
-          pytest -sv tests/singlecard/test_prompt_embedding.py
-          pytest -sv tests/singlecard/ \
-            --ignore=tests/singlecard/test_offline_inference.py \
-            --ignore=tests/singlecard/test_ilama_lora.py \
-            --ignore=tests/singlecard/test_guided_decoding.py \
-            --ignore=tests/singlecard/test_camem.py \
-            --ignore=tests/singlecard/test_ascend_config.py \
-            --ignore=tests/singlecard/test_prompt_embedding.py \
-            --ignore=tests/singlecard/core/test_ascend_scheduler.py \
-            --ignore=tests/singlecard/core/test_ascend_scheduler_e2e.py
+          # pytest -sv tests/e2e/singlecard/test_guided_decoding.py
+          pytest -sv tests/e2e/singlecard/test_camem.py
+          pytest -sv tests/e2e/singlecard/test_prompt_embedding.py
+          pytest -sv tests/e2e/singlecard/ \
+            --ignore=tests/e2e/singlecard/test_offline_inference.py \
+            --ignore=tests/e2e/singlecard/test_ilama_lora.py \
+            --ignore=tests/e2e/singlecard/test_guided_decoding.py \
+            --ignore=tests/e2e/singlecard/test_camem.py \
+            --ignore=tests/e2e/singlecard/test_prompt_embedding.py \
+            --ignore=tests/e2e/singlecard/core/test_ascend_scheduler.py \
+            --ignore=tests/e2e/singlecard/core/test_ascend_scheduler_e2e.py

  e2e-4-cards:
    needs: [e2e]
@@ -224,7 +268,7 @@ jobs:
      matrix:
        os: [linux-arm64-npu-4]
        vllm_version: [main, v0.9.1]
-    name: vLLM Ascend test
+    name: multicard e2e test
    runs-on: ${{ matrix.os }}
    container:
      # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
@@ -279,14 +323,14 @@ jobs:
        run: |
          # TODO: switch hf to modelscope
          VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
-            pytest -sv tests/multicard/test_ilama_lora_tp2.py
-          # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error.
+            pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
+          # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
          # To avoid oom, we need to run the test in a single process.
-          pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
-          pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
-          pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
-          pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
-          pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
+          pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py --ignore=tests/e2e/multicard/test_offline_inference_distributed.py

      - name: Run vllm-project/vllm-ascend test on V0 engine
        if: ${{ github.event_name == 'schedule' }}
@@ -296,11 +340,11 @@ jobs:
        run: |
          # TODO: switch hf to modelscope
          VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
-            pytest -sv tests/multicard/test_ilama_lora_tp2.py
-          # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error.
+            pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
+          # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
          # To avoid oom, we need to run the test in a single process.
-          pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
-          pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
-          pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
-          pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
-          pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
+          pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py --ignore=tests/e2e/multicard/test_offline_inference_distributed.py
--- a/.github/workflows/vllm_ascend_test_long_term.yaml
+++ b/.github/workflows/vllm_ascend_test_long_term.yaml
@@ -96,12 +96,12 @@ jobs:
        run: |
          if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
            # spec decode test
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
            # TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
-            # VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_v1_spec_decode.py
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_mtp_correctness.py  # it needs a clean process
-            pytest -sv tests/long_term/spec_decode --ignore=tests/long_term/spec_decode/e2e/test_mtp_correctness.py --ignore=tests/long_term/spec_decode/e2e/test_v1_spec_decode.py --ignore=tests/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
-            pytest -sv tests/long_term/test_accuracy.py
+            # VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_v1_spec_decode.py
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_mtp_correctness.py  # it needs a clean process
+            pytest -sv tests/e2e/long_term/spec_decode --ignore=tests/e2e/long_term/spec_decode/e2e/test_mtp_correctness.py --ignore=tests/e2e/long_term/spec_decode/e2e/test_v1_spec_decode.py --ignore=tests/e2e/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
+            pytest -sv tests/e2e/long_term/test_accuracy.py
          else
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/test_deepseek_v2_lite_tp2_accuracy.py
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/test_deepseek_v2_lite_tp2_accuracy.py
          fi