[CI] fix ci (#2464)

### What this PR does / why we need it? 1. use action/checkout@v5 instead of v4 2. remove dbo test case because there is issue with it and will be refactored later 3. make vllm-ascend compatible with vllm v0.10.1.1 and add CI for it 4. fix sampler api changes introduced by https://github.com/vllm-project/vllm/pull/22387 6. fix qwen3 moe config changes intruoduced by https://github.com/vllm-project/vllm/pull/20562 7. fix kvcache block changes introduced by https://github.com/vllm-project/vllm/pull/23262 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed with existing test. - vLLM version: v0.10.0 - vLLM main: 0c6e40bbaa --------- Signed-off-by: MengqingCao <cmq0113@163.com>
2025-08-22 07:30:48 +08:00
parent 0ca3f48c90
commit b0403f8d8a
27 changed files with 389 additions and 199 deletions
--- a/.github/workflows/vllm_ascend_test.yaml
+++ b/.github/workflows/vllm_ascend_test.yaml
@@ -81,7 +81,7 @@ jobs:
        VLLM_USE_MODELSCOPE: True
    strategy:
      matrix:
-        vllm_version: [main]
+        vllm_version: [v0.10.1.1, main]
    steps:
      - name: Install packages
        run: |
@@ -89,7 +89,7 @@ jobs:
          apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev curl gnupg2

      - name: Checkout vllm-project/vllm repo
-        uses: actions/checkout@v5
+        uses: actions/checkout@v4
        with:
          repository: vllm-project/vllm
          ref: ${{ matrix.vllm_version }}
@@ -102,7 +102,7 @@ jobs:
          python3 -m pip uninstall -y triton

      - name: Checkout vllm-project/vllm-ascend repo
-        uses: actions/checkout@v5
+        uses: actions/checkout@v4

      - name: Install vllm-project/vllm-ascend
        run: |
@@ -137,7 +137,7 @@ jobs:
      max-parallel: 2
      matrix:
        os: [linux-aarch64-a2-1]
-        vllm_version: [main]
+        vllm_version: [v0.10.1.1, main]
    name: singlecard e2e test
    runs-on: ${{ matrix.os }}
    container:
@@ -219,7 +219,7 @@ jobs:
      max-parallel: 2
      matrix:
        os: [linux-aarch64-a2-2]
-        vllm_version: [main]
+        vllm_version: [v0.10.1.1, main]
    name: multicard e2e test
    runs-on: ${{ matrix.os }}
    container:
@@ -278,7 +278,6 @@ jobs:
          # To avoid oom, we need to run the test in a single process.
          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_dbo
          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeekV3_dbo
          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_alltoallv
          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC