Upgrade vLLM version to v0.9.2 (#1652)
### What this PR does / why we need it?
This patch upgrade vLLM version to v0.9.2, this patch didn't remove the
v0.9.1 compatible code to easy review.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
- vLLM version: v0.9.1
- vLLM main:
14601f5fba
- Accuracy test with 0.9.2:
https://github.com/vllm-project/vllm-ascend/actions/runs/16121612087
Signed-off-by: Yikun Jiang <yikunkero@gmail.com>
This commit is contained in:
3
.github/workflows/accuracy_test.yaml
vendored
3
.github/workflows/accuracy_test.yaml
vendored
@@ -37,6 +37,7 @@ on:
|
|||||||
# Current supported vLLM versions
|
# Current supported vLLM versions
|
||||||
options:
|
options:
|
||||||
- main
|
- main
|
||||||
|
- v0.9.2
|
||||||
- v0.9.1
|
- v0.9.1
|
||||||
- v0.7.3
|
- v0.7.3
|
||||||
vllm-ascend-version:
|
vllm-ascend-version:
|
||||||
@@ -163,7 +164,7 @@ jobs:
|
|||||||
repository: vllm-project/vllm
|
repository: vllm-project/vllm
|
||||||
path: ./vllm-empty
|
path: ./vllm-empty
|
||||||
# Please also update this when bump matched version
|
# Please also update this when bump matched version
|
||||||
ref: ${{ github.event.inputs.vllm-version || 'v0.9.1' }}
|
ref: ${{ github.event.inputs.vllm-version || 'v0.9.2' }}
|
||||||
|
|
||||||
- name: Install vllm-project/vllm from source
|
- name: Install vllm-project/vllm from source
|
||||||
working-directory: ./vllm-empty
|
working-directory: ./vllm-empty
|
||||||
|
|||||||
2
.github/workflows/nightly_benchmarks.yaml
vendored
2
.github/workflows/nightly_benchmarks.yaml
vendored
@@ -50,7 +50,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- vllm_branch: v0.9.1
|
- vllm_branch: v0.9.2
|
||||||
vllm_ascend_branch: main
|
vllm_ascend_branch: main
|
||||||
vllm_use_v1: 1
|
vllm_use_v1: 1
|
||||||
max-parallel: 1
|
max-parallel: 1
|
||||||
|
|||||||
8
.github/workflows/vllm_ascend_test.yaml
vendored
8
.github/workflows/vllm_ascend_test.yaml
vendored
@@ -138,13 +138,13 @@ jobs:
|
|||||||
if: ${{ needs.lint.result == 'success' || github.event_name == 'push' }}
|
if: ${{ needs.lint.result == 'success' || github.event_name == 'push' }}
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
container:
|
container:
|
||||||
image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
|
image: quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
|
||||||
env:
|
env:
|
||||||
VLLM_LOGGING_LEVEL: ERROR
|
VLLM_LOGGING_LEVEL: ERROR
|
||||||
VLLM_USE_MODELSCOPE: True
|
VLLM_USE_MODELSCOPE: True
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
vllm_version: [main, v0.9.1]
|
vllm_version: [main, v0.9.2]
|
||||||
steps:
|
steps:
|
||||||
- name: Install packages
|
- name: Install packages
|
||||||
run: |
|
run: |
|
||||||
@@ -201,7 +201,7 @@ jobs:
|
|||||||
max-parallel: 2
|
max-parallel: 2
|
||||||
matrix:
|
matrix:
|
||||||
os: [linux-arm64-npu-1]
|
os: [linux-arm64-npu-1]
|
||||||
vllm_version: [main, v0.9.1]
|
vllm_version: [main, v0.9.2]
|
||||||
name: singlecard e2e test
|
name: singlecard e2e test
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
container:
|
container:
|
||||||
@@ -302,7 +302,7 @@ jobs:
|
|||||||
max-parallel: 1
|
max-parallel: 1
|
||||||
matrix:
|
matrix:
|
||||||
os: [linux-arm64-npu-4]
|
os: [linux-arm64-npu-4]
|
||||||
vllm_version: [main, v0.9.1]
|
vllm_version: [main, v0.9.2]
|
||||||
name: multicard e2e test
|
name: multicard e2e test
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
container:
|
container:
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ jobs:
|
|||||||
max-parallel: 2
|
max-parallel: 2
|
||||||
matrix:
|
matrix:
|
||||||
os: [linux-arm64-npu-1, linux-arm64-npu-4]
|
os: [linux-arm64-npu-1, linux-arm64-npu-4]
|
||||||
vllm_version: [main, v0.9.1]
|
vllm_version: [main, v0.9.2]
|
||||||
name: vLLM Ascend long term test
|
name: vLLM Ascend long term test
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
container:
|
container:
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
|
|||||||
|
|
||||||
# Install vLLM
|
# Install vLLM
|
||||||
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
||||||
ARG VLLM_TAG=v0.9.1
|
ARG VLLM_TAG=v0.9.2
|
||||||
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
||||||
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
||||||
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
|
|||||||
|
|
||||||
# Install vLLM
|
# Install vLLM
|
||||||
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
||||||
ARG VLLM_TAG=v0.9.1
|
ARG VLLM_TAG=v0.9.2
|
||||||
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
||||||
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
||||||
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
|
|||||||
|
|
||||||
# Install vLLM
|
# Install vLLM
|
||||||
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
||||||
ARG VLLM_TAG=v0.9.1
|
ARG VLLM_TAG=v0.9.2
|
||||||
|
|
||||||
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
||||||
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
|
|||||||
|
|
||||||
# Install vLLM
|
# Install vLLM
|
||||||
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
||||||
ARG VLLM_TAG=v0.9.1
|
ARG VLLM_TAG=v0.9.2
|
||||||
|
|
||||||
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
||||||
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
||||||
|
|||||||
@@ -74,8 +74,8 @@ Usually, each minor version of vLLM (such as 0.7) will correspond to a vLLM Asce
|
|||||||
|
|
||||||
| Branch | Status | Note |
|
| Branch | Status | Note |
|
||||||
|------------|--------------|--------------------------------------|
|
|------------|--------------|--------------------------------------|
|
||||||
| main | Maintained | CI commitment for vLLM main branch and vLLM 0.9.x branch |
|
| main | Maintained | CI commitment for vLLM main branch and vLLM 0.9.2 branch |
|
||||||
| v0.9.1-dev | Maintained | CI commitment for vLLM 0.9.0 and 0.9.1 version |
|
| v0.9.1-dev | Maintained | CI commitment for vLLM 0.9.1 version |
|
||||||
| v0.7.3-dev | Maintained | CI commitment for vLLM 0.7.3 version |
|
| v0.7.3-dev | Maintained | CI commitment for vLLM 0.7.3 version |
|
||||||
| v0.7.1-dev | Unmaintained | Replaced by v0.7.3-dev |
|
| v0.7.1-dev | Unmaintained | Replaced by v0.7.3-dev |
|
||||||
|
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ From v0.9.1rc1 with V1 Engine, vLLM Ascend will run models in graph mode by defa
|
|||||||
|
|
||||||
There are two kinds for graph mode supported by vLLM Ascend:
|
There are two kinds for graph mode supported by vLLM Ascend:
|
||||||
- **ACLGraph**: This is the default graph mode supported by vLLM Ascend. In v0.9.1rc1, only Qwen series models are well tested.
|
- **ACLGraph**: This is the default graph mode supported by vLLM Ascend. In v0.9.1rc1, only Qwen series models are well tested.
|
||||||
- **TorchAirGraph**: This is the GE graph mode. In v0.9.1rc1, only DeepSeek series models are supported. In v0.9.1rc2, we also support PanguProMoe with torchair.
|
- **TorchAirGraph**: This is the GE graph mode. In v0.9.1rc1, only DeepSeek series models are supported.
|
||||||
|
|
||||||
## Using ACLGraph
|
## Using ACLGraph
|
||||||
ACLGraph is enabled by default. Take Qwen series models as an example, just set to use V1 Engine is enough.
|
ACLGraph is enabled by default. Take Qwen series models as an example, just set to use V1 Engine is enough.
|
||||||
|
|||||||
Reference in New Issue
Block a user