[CI] Update vLLM to v0.11.0 (#3315)
### What this PR does / why we need it? There are 3 step to upgrade vllm-ascend to newest vllm. We'll create 3 PR - [x] Upgrade vllm to v0.11.0 to make CI happy first . - [ ] Move deepseek v3.2 to vllm way - [ ] Then we'll add a new PR to add vllm main support. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.0 Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
2
.github/workflows/accuracy_test.yaml
vendored
2
.github/workflows/accuracy_test.yaml
vendored
@@ -112,7 +112,7 @@ jobs:
|
|||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
repository: vllm-project/vllm
|
repository: vllm-project/vllm
|
||||||
ref: v0.11.0rc3
|
ref: v0.11.0
|
||||||
path: ./vllm-empty
|
path: ./vllm-empty
|
||||||
|
|
||||||
- name: Install vllm-project/vllm from source
|
- name: Install vllm-project/vllm from source
|
||||||
|
|||||||
2
.github/workflows/format_pr_body.yaml
vendored
2
.github/workflows/format_pr_body.yaml
vendored
@@ -36,7 +36,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Get vLLM version
|
- name: Get vLLM version
|
||||||
run: |
|
run: |
|
||||||
VLLM_COMMIT=releases/v0.11.0
|
VLLM_COMMIT=v0.11.0
|
||||||
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
|
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
|||||||
2
.github/workflows/nightly_benchmarks.yaml
vendored
2
.github/workflows/nightly_benchmarks.yaml
vendored
@@ -51,7 +51,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- vllm_branch: v0.11.0rc3
|
- vllm_branch: v0.11.0
|
||||||
vllm_ascend_branch: main
|
vllm_ascend_branch: main
|
||||||
vllm_use_v1: 1
|
vllm_use_v1: 1
|
||||||
max-parallel: 1
|
max-parallel: 1
|
||||||
|
|||||||
2
.github/workflows/vllm_ascend_dist.yaml
vendored
2
.github/workflows/vllm_ascend_dist.yaml
vendored
@@ -43,7 +43,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
os: [linux-aarch64-a3-8]
|
os: [linux-aarch64-a3-8]
|
||||||
vllm_version: [v0.11.0rc3]
|
vllm_version: [v0.11.0]
|
||||||
name: vLLM Ascend test
|
name: vLLM Ascend test
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
container:
|
container:
|
||||||
|
|||||||
6
.github/workflows/vllm_ascend_test.yaml
vendored
6
.github/workflows/vllm_ascend_test.yaml
vendored
@@ -42,7 +42,7 @@ jobs:
|
|||||||
lint:
|
lint:
|
||||||
uses: ./.github/workflows/pre-commit.yml
|
uses: ./.github/workflows/pre-commit.yml
|
||||||
with:
|
with:
|
||||||
vllm: releases/v0.11.0
|
vllm: v0.11.0
|
||||||
|
|
||||||
changes:
|
changes:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -83,7 +83,7 @@ jobs:
|
|||||||
VLLM_USE_MODELSCOPE: True
|
VLLM_USE_MODELSCOPE: True
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
vllm_version: [releases/v0.11.0, v0.11.0rc3]
|
vllm_version: [v0.11.0]
|
||||||
steps:
|
steps:
|
||||||
- name: Install packages
|
- name: Install packages
|
||||||
run: |
|
run: |
|
||||||
@@ -145,7 +145,7 @@ jobs:
|
|||||||
name: e2e-light
|
name: e2e-light
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
vllm_version: [releases/v0.11.0, v0.11.0rc3]
|
vllm_version: [v0.11.0]
|
||||||
# Note (yikun): If CI resource are limited we can split job into two chain jobs
|
# Note (yikun): If CI resource are limited we can split job into two chain jobs
|
||||||
needs: [lint, changes]
|
needs: [lint, changes]
|
||||||
# only trigger e2e test after lint passed and the change is e2e related with pull request.
|
# only trigger e2e test after lint passed and the change is e2e related with pull request.
|
||||||
|
|||||||
2
.github/workflows/vllm_ascend_test_310p.yaml
vendored
2
.github/workflows/vllm_ascend_test_310p.yaml
vendored
@@ -53,7 +53,7 @@ jobs:
|
|||||||
max-parallel: 2
|
max-parallel: 2
|
||||||
matrix:
|
matrix:
|
||||||
os: [linux-aarch64-310p-1, linux-aarch64-310p-4]
|
os: [linux-aarch64-310p-1, linux-aarch64-310p-4]
|
||||||
vllm_version: [v0.11.0rc3]
|
vllm_version: [v0.11.0]
|
||||||
name: 310p e2e test
|
name: 310p e2e test
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
container:
|
container:
|
||||||
|
|||||||
2
.github/workflows/vllm_ascend_test_full.yaml
vendored
2
.github/workflows/vllm_ascend_test_full.yaml
vendored
@@ -68,7 +68,7 @@ jobs:
|
|||||||
name: e2e-full
|
name: e2e-full
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
vllm_version: [releases/v0.11.0, v0.11.0rc3]
|
vllm_version: [v0.11.0]
|
||||||
needs: [changes]
|
needs: [changes]
|
||||||
if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
|
if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
|
||||||
uses: ./.github/workflows/_e2e_test.yaml
|
uses: ./.github/workflows/_e2e_test.yaml
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ jobs:
|
|||||||
e2e-test:
|
e2e-test:
|
||||||
uses: ./.github/workflows/_e2e_test.yaml
|
uses: ./.github/workflows/_e2e_test.yaml
|
||||||
with:
|
with:
|
||||||
vllm: releases/v0.11.0
|
vllm: v0.11.0
|
||||||
runner: linux-aarch64-a2
|
runner: linux-aarch64-a2
|
||||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
|
||||||
type: full
|
type: full
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
|
|||||||
|
|
||||||
# Install vLLM
|
# Install vLLM
|
||||||
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
||||||
ARG VLLM_TAG=v0.11.0rc3
|
ARG VLLM_TAG=v0.11.0
|
||||||
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
||||||
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
||||||
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
|
|||||||
|
|
||||||
# Install vLLM
|
# Install vLLM
|
||||||
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
||||||
ARG VLLM_TAG=v0.11.0rc3
|
ARG VLLM_TAG=v0.11.0
|
||||||
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
||||||
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
||||||
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
|
|||||||
|
|
||||||
# Install vLLM
|
# Install vLLM
|
||||||
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
||||||
ARG VLLM_TAG=v0.11.0rc3
|
ARG VLLM_TAG=v0.11.0
|
||||||
|
|
||||||
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
||||||
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
|
|||||||
|
|
||||||
# Install vLLM
|
# Install vLLM
|
||||||
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
||||||
ARG VLLM_TAG=v0.11.0rc3
|
ARG VLLM_TAG=v0.11.0
|
||||||
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
||||||
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
||||||
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
|
|||||||
|
|
||||||
# Install vLLM
|
# Install vLLM
|
||||||
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
||||||
ARG VLLM_TAG=v0.11.0rc3
|
ARG VLLM_TAG=v0.11.0
|
||||||
|
|
||||||
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
||||||
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
|
|||||||
|
|
||||||
# Install vLLM
|
# Install vLLM
|
||||||
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
||||||
ARG VLLM_TAG=v0.11.0rc3
|
ARG VLLM_TAG=v0.11.0
|
||||||
|
|
||||||
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
||||||
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
||||||
|
|||||||
@@ -422,7 +422,10 @@ class CustomDeepseekV2SFAAttention(DeepseekV2MLAAttention):
|
|||||||
|
|
||||||
class CustomDeepseekV2DecoderLayer(DeepseekV2DecoderLayer):
|
class CustomDeepseekV2DecoderLayer(DeepseekV2DecoderLayer):
|
||||||
|
|
||||||
def __init__(self, vllm_config: VllmConfig, prefix: str) -> None:
|
def __init__(self,
|
||||||
|
vllm_config: VllmConfig,
|
||||||
|
prefix: str,
|
||||||
|
topk_indices_buffer=None) -> None:
|
||||||
nn.Module.__init__(self)
|
nn.Module.__init__(self)
|
||||||
config = vllm_config.model_config.hf_config
|
config = vllm_config.model_config.hf_config
|
||||||
model_config = vllm_config.model_config
|
model_config = vllm_config.model_config
|
||||||
|
|||||||
Reference in New Issue
Block a user