diff --git a/.github/workflows/accuracy_test.yaml b/.github/workflows/accuracy_test.yaml index 2ba1fc7..c1b0e37 100644 --- a/.github/workflows/accuracy_test.yaml +++ b/.github/workflows/accuracy_test.yaml @@ -112,7 +112,7 @@ jobs: uses: actions/checkout@v4 with: repository: vllm-project/vllm - ref: v0.10.2 + ref: v0.11.0rc3 path: ./vllm-empty - name: Install vllm-project/vllm from source diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 993530b..2154349 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -51,7 +51,7 @@ jobs: strategy: matrix: include: - - vllm_branch: v0.10.2 + - vllm_branch: v0.11.0rc3 vllm_ascend_branch: main vllm_use_v1: 1 max-parallel: 1 diff --git a/.github/workflows/vllm_ascend_dist.yaml b/.github/workflows/vllm_ascend_dist.yaml index b0b49a9..2a75318 100644 --- a/.github/workflows/vllm_ascend_dist.yaml +++ b/.github/workflows/vllm_ascend_dist.yaml @@ -43,7 +43,7 @@ jobs: strategy: matrix: os: [linux-aarch64-a3-8] - vllm_version: [v0.10.2] + vllm_version: [v0.11.0rc3] name: vLLM Ascend test runs-on: ${{ matrix.os }} container: diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index 7470694..a551051 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -83,7 +83,7 @@ jobs: VLLM_USE_MODELSCOPE: True strategy: matrix: - vllm_version: [releases/v0.11.0, v0.10.2] + vllm_version: [releases/v0.11.0, v0.11.0rc3] steps: - name: Install packages run: | @@ -138,7 +138,7 @@ jobs: name: e2e-light strategy: matrix: - vllm_version: [releases/v0.11.0, v0.10.2] + vllm_version: [releases/v0.11.0, v0.11.0rc3] # Note (yikun): If CI resource are limited we can split job into two chain jobs needs: [lint, changes] # only trigger e2e test after lint passed and the change is e2e related with pull request. diff --git a/.github/workflows/vllm_ascend_test_310p.yaml b/.github/workflows/vllm_ascend_test_310p.yaml index c6afb6b..7c85f84 100644 --- a/.github/workflows/vllm_ascend_test_310p.yaml +++ b/.github/workflows/vllm_ascend_test_310p.yaml @@ -53,7 +53,7 @@ jobs: max-parallel: 2 matrix: os: [linux-aarch64-310p-1, linux-aarch64-310p-4] - vllm_version: [v0.10.2] + vllm_version: [v0.11.0rc3] name: 310p e2e test runs-on: ${{ matrix.os }} container: diff --git a/.github/workflows/vllm_ascend_test_full.yaml b/.github/workflows/vllm_ascend_test_full.yaml index 5f5089d..1c9f4d1 100644 --- a/.github/workflows/vllm_ascend_test_full.yaml +++ b/.github/workflows/vllm_ascend_test_full.yaml @@ -68,7 +68,7 @@ jobs: name: e2e-full strategy: matrix: - vllm_version: [releases/v0.11.0, v0.10.2] + vllm_version: [releases/v0.11.0, v0.11.0rc3] needs: [changes] if: ${{ needs.changes.outputs.e2e_tracker == 'true' }} uses: ./.github/workflows/_e2e_test.yaml diff --git a/Dockerfile b/Dockerfile index a0d2b5b..1d0b73b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.10.2 +ARG VLLM_TAG=v0.11.0rc3 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \ diff --git a/Dockerfile.310p b/Dockerfile.310p index 402ae08..f5ec94f 100644 --- a/Dockerfile.310p +++ b/Dockerfile.310p @@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.10.2 +ARG VLLM_TAG=v0.11.0rc3 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \ diff --git a/Dockerfile.310p.openEuler b/Dockerfile.310p.openEuler index 68db234..3e9a2da 100644 --- a/Dockerfile.310p.openEuler +++ b/Dockerfile.310p.openEuler @@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/ # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.10.2 +ARG VLLM_TAG=v0.11.0rc3 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. diff --git a/Dockerfile.a3 b/Dockerfile.a3 index 8da3af0..de01698 100644 --- a/Dockerfile.a3 +++ b/Dockerfile.a3 @@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.10.2 +ARG VLLM_TAG=v0.11.0rc3 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \ diff --git a/Dockerfile.a3.openEuler b/Dockerfile.a3.openEuler index 61c4507..cec4ab6 100644 --- a/Dockerfile.a3.openEuler +++ b/Dockerfile.a3.openEuler @@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/ # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.10.2 +ARG VLLM_TAG=v0.11.0rc3 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler index c78490c..14b6cce 100644 --- a/Dockerfile.openEuler +++ b/Dockerfile.openEuler @@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/ # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.10.2 +ARG VLLM_TAG=v0.11.0rc3 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. diff --git a/docs/source/conf.py b/docs/source/conf.py index 11044ee..6444665 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -77,7 +77,7 @@ myst_substitutions = { # CANN image tag 'cann_image_tag': "8.2.rc1-910b-ubuntu22.04-py3.11", # vllm version in ci - 'ci_vllm_version': 'v0.10.2', + 'ci_vllm_version': 'v0.11.0rc3', } # Add any paths that contain templates here, relative to this directory.