From 693f547ccf01194f291b30cf2923d220e4a44dd9 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Sun, 21 Sep 2025 13:27:08 +0800 Subject: [PATCH] Refactor ci to reuse base workflow and re-enable ut coverage (#3064) ### What this PR does / why we need it? 1. Refactor ci to reuse base workflow and enable main 2 hours trigger job: - Extract e2e test in to _e2e_test.yaml - Reuse _e2e_test in light / full job - Enable main 2 hours trigger job 2. Rename e2e test to ascend test to make sure action display label 3. Re-enable ut coverage which was failed since https://github.com/vllm-project/vllm-ascend/commit/5bcb4c1528311b8f20c91a9239ac38172f002fcb and disable on https://github.com/vllm-project/vllm-ascend/commit/6d8bc38c7b0faba0b666585a09b7523c7714b9cc ### Does this PR introduce _any_ user-facing change? Only developer behavior changes: - Every job trigger full test with vllm release and hash - Run full job per 2 hours with vllm main - e2e light test (30 mins): `lint` (6mins) ---> ut (10mins) ---> `v0.10.2 + main / 4 jobs` (15mins) - e2e full test (1.5h): `ready label` ---> `v0.10.2 + main / 4 jobs`, about 1.5h - schedule test: 2hours ---> `v0.10.2 + main / 4 jobs`, about 1.5h ### How was this patch tested? CI passed - vLLM version: v0.10.2 - vLLM main: https://github.com/vllm-project/vllm/commit/c60e6137f0bf2034853919b3a9d705d7e06b93cf Signed-off-by: Yikun Jiang --- .github/workflows/_e2e_test.yaml | 192 ++++++++++++++++++ .github/workflows/accuracy_test.yaml | 2 +- .github/workflows/nightly_benchmarks.yaml | 2 +- .github/workflows/vllm_ascend_doctest.yaml | 2 +- .github/workflows/vllm_ascend_test.yaml | 141 ++----------- .github/workflows/vllm_ascend_test_full.yaml | 176 +--------------- .../vllm_ascend_test_full_vllm_main.yaml | 45 ++++ 7 files changed, 264 insertions(+), 296 deletions(-) create mode 100644 .github/workflows/_e2e_test.yaml create mode 100644 .github/workflows/vllm_ascend_test_full_vllm_main.yaml diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml new file mode 100644 index 0000000..7f8df34 --- /dev/null +++ b/.github/workflows/_e2e_test.yaml @@ -0,0 +1,192 @@ +name: 'e2e test' + +on: + workflow_call: + inputs: + vllm: + required: true + type: string + runner: + required: true + type: string + image: + required: true + type: string + type: + required: true + type: string + +jobs: + e2e: + name: singlecard + runs-on: ${{ inputs.runner }}-1 + container: + image: ${{ inputs.image }} + env: + VLLM_LOGGING_LEVEL: ERROR + VLLM_USE_MODELSCOPE: True + steps: + - name: Check npu and CANN info + run: | + npu-smi info + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + + - name: Config mirrors + run: | + sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list + pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple + pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local + apt-get update -y + apt install git -y + + - name: Checkout vllm-project/vllm-ascend repo + uses: actions/checkout@v4 + + - name: Install system dependencies + run: | + apt-get -y install `cat packages.txt` + apt-get -y install gcc g++ cmake libnuma-dev + + - name: Checkout vllm-project/vllm repo + uses: actions/checkout@v4 + with: + repository: vllm-project/vllm + ref: ${{ inputs.vllm }} + path: ./vllm-empty + fetch-depth: 1 + + - name: Install vllm-project/vllm from source + working-directory: ./vllm-empty + run: | + VLLM_TARGET_DEVICE=empty pip install -e . + + - name: Install vllm-project/vllm-ascend + env: + PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi + run: | + pip install -r requirements-dev.txt + pip install -v -e . + + - name: Run vllm-project/vllm-ascend test + env: + VLLM_WORKER_MULTIPROC_METHOD: spawn + VLLM_USE_MODELSCOPE: True + if: ${{ inputs.type == 'light' }} + run: | + pytest -sv tests/e2e/singlecard/test_aclgraph.py + pytest -sv tests/e2e/singlecard/test_quantization.py + pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl + + - name: Run e2e test + env: + VLLM_WORKER_MULTIPROC_METHOD: spawn + VLLM_USE_MODELSCOPE: True + if: ${{ inputs.type == 'full' }} + run: | + # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run + # the test separately. + + pytest -sv tests/e2e/singlecard/test_aclgraph.py + pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py + pytest -sv tests/e2e/singlecard/test_camem.py + pytest -sv tests/e2e/singlecard/test_chunked.py + pytest -sv tests/e2e/singlecard/test_embedding.py + pytest -sv tests/e2e/singlecard/test_guided_decoding.py + #pytest -sv tests/e2e/singlecard/test_ilama_lora.py + pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py + pytest -sv tests/e2e/singlecard/test_quantization.py + pytest -sv tests/e2e/singlecard/test_sampler.py + pytest -sv tests/e2e/singlecard/test_vlm.py + + # ------------------------------------ v1 spec decode test ------------------------------------ # + pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py + pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py + pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py + + pytest -sv tests/e2e/singlecard/ops/ + + e2e-2-cards: + name: multicard + runs-on: ${{ inputs.runner }}-2 + container: + image: ${{ inputs.image }} + env: + VLLM_LOGGING_LEVEL: ERROR + VLLM_USE_MODELSCOPE: True + steps: + - name: Check npu and CANN info + run: | + npu-smi info + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + + - name: Config mirrors + run: | + sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list + pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple + pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local + apt-get update -y + apt install git -y + + - name: Checkout vllm-project/vllm-ascend repo + uses: actions/checkout@v4 + + - name: Install system dependencies + run: | + apt-get -y install `cat packages.txt` + apt-get -y install gcc g++ cmake libnuma-dev + + - name: Checkout vllm-project/vllm repo + uses: actions/checkout@v4 + with: + repository: vllm-project/vllm + ref: ${{ inputs.vllm }} + path: ./vllm-empty + fetch-depth: 1 + + - name: Install vllm-project/vllm from source + working-directory: ./vllm-empty + run: | + VLLM_TARGET_DEVICE=empty pip install -e . + + - name: Install vllm-project/vllm-ascend + env: + PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi + run: | + pip install -r requirements-dev.txt + pip install -v -e . + + - name: Run vllm-project/vllm-ascend test (light) + env: + VLLM_WORKER_MULTIPROC_METHOD: spawn + VLLM_USE_MODELSCOPE: True + if: ${{ inputs.type == 'light' }} + run: | + pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP + + - name: Run vllm-project/vllm-ascend test (full) + env: + VLLM_WORKER_MULTIPROC_METHOD: spawn + VLLM_USE_MODELSCOPE: True + if: ${{ inputs.type == 'full' }} + run: | + pytest -sv tests/e2e/multicard/test_data_parallel.py + pytest -sv tests/e2e/multicard/test_expert_parallel.py + # external_launcher test is not stable enough. Fix it later + # pytest -sv tests/e2e/multicard/test_external_launcher.py + pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py + #pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py + + # To avoid oom, we need to run the test in a single process. + pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ + pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe + pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8 + pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC + pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC + pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe + pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1 + pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight + + #pytest -sv tests/e2e/multicard/test_pipeline_parallel.py + pytest -sv tests/e2e/multicard/test_prefix_caching.py + pytest -sv tests/e2e/multicard/test_qwen3_moe.py + pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py diff --git a/.github/workflows/accuracy_test.yaml b/.github/workflows/accuracy_test.yaml index c2d4250..2ba1fc7 100644 --- a/.github/workflows/accuracy_test.yaml +++ b/.github/workflows/accuracy_test.yaml @@ -19,7 +19,7 @@ # 1. PR labeled with: '*accuracy-test' (ONLY 1 label valid) & 'ready-for-test' # 2. workflow_dispatch with models input # See detail rule in strategy.matrix note -name: Benchmarks / accuracy +name: ascend test / accuracy on: schedule: diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 0b2e84c..993530b 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -15,7 +15,7 @@ # limitations under the License. # -name: 'Benchmarks / Performance' +name: 'ascend test / performance' # This workflow runs nightly benchmarks for vllm-ascend. on: diff --git a/.github/workflows/vllm_ascend_doctest.yaml b/.github/workflows/vllm_ascend_doctest.yaml index 1b4faea..641c048 100644 --- a/.github/workflows/vllm_ascend_doctest.yaml +++ b/.github/workflows/vllm_ascend_doctest.yaml @@ -15,7 +15,7 @@ # This file is a part of the vllm-ascend project. # -name: 'e2e test / doctest' +name: 'ascend test / doctest' on: workflow_dispatch: diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index c406907..51b136b 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -15,7 +15,7 @@ # This file is a part of the vllm-ascend project. # -name: 'test' +name: 'ascend test' on: push: @@ -43,7 +43,6 @@ jobs: uses: ./.github/workflows/pre-commit.yml changes: - if: github.event_name == 'pull_request' runs-on: ubuntu-latest outputs: e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }} @@ -123,7 +122,8 @@ jobs: --ignore=tests/ut/patch/worker/patch_common/test_patch_minicpm.py - name: Upload coverage to Codecov - if: ${{ matrix.vllm_version != 'v0.10.2' }} + # only upload coverage when commits merged + if: github.event_name == 'push' && github.ref == 'refs/heads/main' uses: codecov/codecov-action@v5 env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} @@ -133,132 +133,17 @@ jobs: verbose: true e2e-light: + name: e2e-light + strategy: + matrix: + vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] + # Note (yikun): If CI resource are limited we can split job into two chain jobs needs: [lint, changes] # only trigger e2e test after lint passed and the change is e2e related with pull request. if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' && !contains(github.event.pull_request.labels.*.name, 'ready') }} - strategy: - max-parallel: 2 - matrix: - os: [linux-aarch64-a2-1] - vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] - name: singlecard e2e test - light - runs-on: ${{ matrix.os }} - container: + uses: ./.github/workflows/_e2e_test.yaml + with: + vllm: v0.10.2 + runner: linux-aarch64-a2 image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11 - env: - VLLM_LOGGING_LEVEL: ERROR - VLLM_USE_MODELSCOPE: True - steps: - - name: Check npu and CANN info - run: | - npu-smi info - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - - - name: Config mirrors - run: | - sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list - pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple - pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local - apt-get update -y - apt install git -y - - - name: Checkout vllm-project/vllm-ascend repo - uses: actions/checkout@v4 - - - name: Install system dependencies - run: | - apt-get -y install `cat packages.txt` - apt-get -y install gcc g++ cmake libnuma-dev - - - name: Checkout vllm-project/vllm repo - uses: actions/checkout@v4 - with: - repository: vllm-project/vllm - ref: ${{ matrix.vllm_version }} - path: ./vllm-empty - fetch-depth: 1 - - - name: Install vllm-project/vllm from source - working-directory: ./vllm-empty - run: | - VLLM_TARGET_DEVICE=empty pip install -e . - - - name: Install vllm-project/vllm-ascend - env: - PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi - run: | - pip install -r requirements-dev.txt - pip install -v -e . - - - name: Run e2e test - env: - VLLM_WORKER_MULTIPROC_METHOD: spawn - VLLM_USE_MODELSCOPE: True - run: | - pytest -sv tests/e2e/singlecard/test_aclgraph.py - pytest -sv tests/e2e/singlecard/test_quantization.py - pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl - - e2e-2-cards-light: - needs: [e2e-light] - if: ${{ needs.e2e-light.result == 'success' }} - strategy: - max-parallel: 2 - matrix: - os: [linux-aarch64-a2-2] - vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] - name: multicard e2e test - light - runs-on: ${{ matrix.os }} - container: - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11 - env: - VLLM_LOGGING_LEVEL: ERROR - VLLM_USE_MODELSCOPE: True - steps: - - name: Check npu and CANN info - run: | - npu-smi info - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - - - name: Config mirrors - run: | - sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list - pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple - pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local - apt-get update -y - apt install git -y - - - name: Checkout vllm-project/vllm-ascend repo - uses: actions/checkout@v4 - - - name: Install system dependencies - run: | - apt-get -y install `cat packages.txt` - apt-get -y install gcc g++ cmake libnuma-dev - - - name: Checkout vllm-project/vllm repo - uses: actions/checkout@v4 - with: - repository: vllm-project/vllm - ref: ${{ matrix.vllm_version }} - path: ./vllm-empty - fetch-depth: 1 - - - name: Install vllm-project/vllm from source - working-directory: ./vllm-empty - run: | - VLLM_TARGET_DEVICE=empty pip install -e . - - - name: Install vllm-project/vllm-ascend - env: - PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi - run: | - pip install -r requirements-dev.txt - pip install -v -e . - - - name: Run vllm-project/vllm-ascend test - env: - VLLM_WORKER_MULTIPROC_METHOD: spawn - VLLM_USE_MODELSCOPE: True - run: | - pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP \ No newline at end of file + type: light diff --git a/.github/workflows/vllm_ascend_test_full.yaml b/.github/workflows/vllm_ascend_test_full.yaml index 0c389a5..e23f640 100644 --- a/.github/workflows/vllm_ascend_test_full.yaml +++ b/.github/workflows/vllm_ascend_test_full.yaml @@ -14,7 +14,7 @@ # limitations under the License. # This file is a part of the vllm-ascend project. # -name: 'test-full' +name: 'ascend test / full' on: pull_request: @@ -64,170 +64,16 @@ jobs: ut_tracker: - 'tests/ut/**' - e2e-full: - # only trigger full test when pull request is approved + e2e-test: + name: e2e-full + strategy: + matrix: + vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] needs: [changes] if: ${{ needs.changes.outputs.e2e_tracker == 'true' }} - strategy: - max-parallel: 2 - matrix: - os: [linux-aarch64-a2-1] - vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] - name: singlecard e2e test - full - runs-on: ${{ matrix.os }} - container: + uses: ./.github/workflows/_e2e_test.yaml + with: + vllm: ${{ matrix.vllm_version }} + runner: linux-aarch64-a2 image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11 - env: - VLLM_LOGGING_LEVEL: ERROR - VLLM_USE_MODELSCOPE: True - steps: - - name: Check npu and CANN info - run: | - npu-smi info - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - - - name: Config mirrors - run: | - sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list - pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple - pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local - apt-get update -y - apt install git -y - - - name: Checkout vllm-project/vllm-ascend repo - uses: actions/checkout@v4 - - - name: Install system dependencies - run: | - apt-get -y install `cat packages.txt` - apt-get -y install gcc g++ cmake libnuma-dev - - - name: Checkout vllm-project/vllm repo - uses: actions/checkout@v4 - with: - repository: vllm-project/vllm - ref: ${{ matrix.vllm_version }} - path: ./vllm-empty - - - name: Install vllm-project/vllm from source - working-directory: ./vllm-empty - run: | - VLLM_TARGET_DEVICE=empty pip install -e . - - - name: Install vllm-project/vllm-ascend - env: - PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi - run: | - pip install -r requirements-dev.txt - pip install -v -e . - - - name: Run e2e test - env: - VLLM_WORKER_MULTIPROC_METHOD: spawn - VLLM_USE_MODELSCOPE: True - run: | - # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run - # the test separately. - - pytest -sv tests/e2e/singlecard/test_aclgraph.py - pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py - pytest -sv tests/e2e/singlecard/test_camem.py - pytest -sv tests/e2e/singlecard/test_chunked.py - pytest -sv tests/e2e/singlecard/test_embedding.py - pytest -sv tests/e2e/singlecard/test_guided_decoding.py - #pytest -sv tests/e2e/singlecard/test_ilama_lora.py - pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py - pytest -sv tests/e2e/singlecard/test_quantization.py - pytest -sv tests/e2e/singlecard/test_sampler.py - pytest -sv tests/e2e/singlecard/test_vlm.py - - # ------------------------------------ v1 spec decode test ------------------------------------ # - pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py - pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py - pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py - - pytest -sv tests/e2e/singlecard/ops/ - - e2e-2-cards-full: - # only trigger full test when pull request is approved - needs: [changes] - if: ${{ needs.changes.outputs.e2e_tracker == 'true' }} - strategy: - max-parallel: 2 - matrix: - os: [linux-aarch64-a2-2] - vllm_version: [c60e6137f0bf2034853919b3a9d705d7e06b93cf, v0.10.2] - name: multicard e2e test - full - runs-on: ${{ matrix.os }} - container: - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11 - env: - VLLM_LOGGING_LEVEL: ERROR - VLLM_USE_MODELSCOPE: True - steps: - - name: Check npu and CANN info - run: | - npu-smi info - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - - - name: Config mirrors - run: | - sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list - pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple - pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local - apt-get update -y - apt install git -y - - - name: Checkout vllm-project/vllm-ascend repo - uses: actions/checkout@v4 - - - name: Install system dependencies - run: | - apt-get -y install `cat packages.txt` - apt-get -y install gcc g++ cmake libnuma-dev - - - name: Checkout vllm-project/vllm repo - uses: actions/checkout@v4 - with: - repository: vllm-project/vllm - ref: ${{ matrix.vllm_version }} - path: ./vllm-empty - - - name: Install vllm-project/vllm from source - working-directory: ./vllm-empty - run: | - VLLM_TARGET_DEVICE=empty pip install -e . - - - name: Install vllm-project/vllm-ascend - env: - PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi - run: | - pip install -r requirements-dev.txt - pip install -v -e . - - - name: Run vllm-project/vllm-ascend test - env: - VLLM_WORKER_MULTIPROC_METHOD: spawn - VLLM_USE_MODELSCOPE: True - run: | - #pytest -sv tests/e2e/multicard/test_data_parallel.py - pytest -sv tests/e2e/multicard/test_expert_parallel.py - # external_launcher test is not stable enough. Fix it later - # pytest -sv tests/e2e/multicard/test_external_launcher.py - pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py - #pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py - - # To avoid oom, we need to run the test in a single process. - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8 - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1 - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight - - #pytest -sv tests/e2e/multicard/test_pipeline_parallel.py - pytest -sv tests/e2e/multicard/test_prefix_caching.py - pytest -sv tests/e2e/multicard/test_qwen3_moe.py - pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py + type: full diff --git a/.github/workflows/vllm_ascend_test_full_vllm_main.yaml b/.github/workflows/vllm_ascend_test_full_vllm_main.yaml new file mode 100644 index 0000000..48dc695 --- /dev/null +++ b/.github/workflows/vllm_ascend_test_full_vllm_main.yaml @@ -0,0 +1,45 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# +name: 'ascend test / vllm main' + +on: + # Run 1-card and 2-cards e2e tests per 2h + schedule: + - cron: '0 */2 * * *' + workflow_dispatch: + +# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly +# declared as "shell: bash -el {0}" on steps that need to be properly activated. +# It's used to activate ascend-toolkit environment variables. +defaults: + run: + shell: bash -el {0} + +# only cancel in-progress runs of the same workflow +# and ignore the lint / 1 card / 4 cards test type +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + e2e-test: + uses: ./.github/workflows/_e2e_test.yaml + with: + vllm: main + runner: linux-aarch64-a2 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11 + type: full