From 5897dc5bbe321ca90c26225d0d70bff24061d04b Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Tue, 6 May 2025 11:44:12 +0800 Subject: [PATCH] [Build] Bump vLLM version to v0.8.5.post1 (#755) ### What this PR does / why we need it? Bump vllm version to v0.8.5.post1 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed Signed-off-by: Yikun Jiang --- .github/workflows/vllm_ascend_test.yaml | 4 ++-- Dockerfile | 2 +- Dockerfile.openEuler | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index b49e527..6f78367 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -48,7 +48,7 @@ jobs: max-parallel: 2 matrix: os: [linux-arm64-npu-1, linux-arm64-npu-4] - vllm_verison: [main, v0.8.5] + vllm_verison: [main, v0.8.5.post1] concurrency: group: > ${{ @@ -153,7 +153,7 @@ jobs: - name: Run vllm-project/vllm-ascend Speculative Decode test # speculative decode seems will cause oom issue, only disable it now on ci test with vLLM main - if: matrix.vllm_verison == 'v0.8.5' && steps.filter_spec_decode.outputs.speculative_tests_changed == 'true' || github.event_name == 'schedule' + if: matrix.vllm_verison == 'v0.8.5.post1' && steps.filter_spec_decode.outputs.speculative_tests_changed == 'true' || github.event_name == 'schedule' run: | if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then pytest -sv tests/singlecard/spec_decode/e2e/test_mtp_correctness.py # it needs a clean process diff --git a/Dockerfile b/Dockerfile index d01b988..3ca5431 100644 --- a/Dockerfile +++ b/Dockerfile @@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.8.5 +ARG VLLM_TAG=v0.8.5.post1 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \ diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler index 3e3d127..272a399 100644 --- a/Dockerfile.openEuler +++ b/Dockerfile.openEuler @@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/ # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.8.5 +ARG VLLM_TAG=v0.8.5.post1 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.