[CI] Add daily images build for nightly ci (#3989)
### What this PR does / why we need it?
Given the current excessively long build time of our nightly-ci, I
recommend installing necessary, confirmed versions of packages in the
Docker image to reduce the time required for integration testing.
Including Mooncake vllm with fixed tags, This is expected to reduce
nightly-ci duration by 2 hours.
- vLLM version: v0.11.0
- vLLM main:
2918c1b49c
---------
Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
44
.github/Dockerfile.nightly.a2
vendored
Normal file
44
.github/Dockerfile.nightly.a2
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
|
||||
FROM quay.io/ascend/vllm-ascend:main
|
||||
|
||||
ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
|
||||
ARG AIS_BENCH_TAG="v3.0-20250930-master"
|
||||
ARG AIS_BENCH_URL="https://gitee.com/aisbench/benchmark.git"
|
||||
|
||||
# Define environments
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
RUN pip config set global.index-url ${PIP_INDEX_URL}
|
||||
|
||||
# Install requirements-dev.txt for tests
|
||||
RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
|
||||
cd /vllm-workspace/vllm-ascend && \
|
||||
python3 -m pip install -r requirements-dev.txt && \
|
||||
python3 -m pip cache purge
|
||||
|
||||
# Install benchmark tools
|
||||
RUN git clone -b ${AIS_BENCH_TAG} --depth 1 ${AIS_BENCH_URL} /vllm-workspace/vllm-ascend/benchmark && \
|
||||
cd /vllm-workspace/vllm-ascend/benchmark && \
|
||||
pip install -e . -r requirements/api.txt -r requirements/extra.txt && \
|
||||
python3 -m pip cache purge
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
44
.github/Dockerfile.nightly.a3
vendored
Normal file
44
.github/Dockerfile.nightly.a3
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
|
||||
FROM quay.io/ascend/vllm-ascend:main-a3
|
||||
|
||||
ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
|
||||
ARG AIS_BENCH_TAG="v3.0-20250930-master"
|
||||
ARG AIS_BENCH_URL="https://gitee.com/aisbench/benchmark.git"
|
||||
|
||||
# Define environments
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
RUN pip config set global.index-url ${PIP_INDEX_URL}
|
||||
|
||||
# Install requirements-dev.txt for tests
|
||||
RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
|
||||
cd /vllm-workspace/vllm-ascend && \
|
||||
python3 -m pip install -r requirements-dev.txt && \
|
||||
python3 -m pip cache purge
|
||||
|
||||
# Install benchmark tools
|
||||
RUN git clone -b ${AIS_BENCH_TAG} --depth 1 ${AIS_BENCH_URL} /vllm-workspace/vllm-ascend/benchmark && \
|
||||
cd /vllm-workspace/vllm-ascend/benchmark && \
|
||||
pip install -e . -r requirements/api.txt -r requirements/extra.txt && \
|
||||
python3 -m pip cache purge
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
95
.github/workflows/_e2e_nightly_single_node.yaml
vendored
95
.github/workflows/_e2e_nightly_single_node.yaml
vendored
@@ -62,67 +62,56 @@ jobs:
|
||||
npu-smi info
|
||||
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
|
||||
|
||||
- name: Config mirrors
|
||||
- name: Show vLLM and vLLM-Ascend version
|
||||
working-directory: /vllm-workspace
|
||||
run: |
|
||||
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
|
||||
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
||||
apt-get update -y
|
||||
apt install git -y
|
||||
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
|
||||
echo "Installed vLLM-related Python packages:"
|
||||
pip list | grep vllm || echo "No vllm packages found."
|
||||
|
||||
- name: Checkout vllm-project/vllm-ascend repo
|
||||
uses: actions/checkout@v4
|
||||
echo ""
|
||||
echo "============================"
|
||||
echo "vLLM Git information"
|
||||
echo "============================"
|
||||
cd vllm
|
||||
if [ -d .git ]; then
|
||||
echo "Branch: $(git rev-parse --abbrev-ref HEAD)"
|
||||
echo "Commit hash: $(git rev-parse HEAD)"
|
||||
echo "Author: $(git log -1 --pretty=format:'%an <%ae>')"
|
||||
echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)"
|
||||
echo "Message: $(git log -1 --pretty=format:'%s')"
|
||||
echo "Tags: $(git tag --points-at HEAD || echo 'None')"
|
||||
echo "Remote: $(git remote -v | head -n1)"
|
||||
echo ""
|
||||
else
|
||||
echo "No .git directory found in vllm"
|
||||
fi
|
||||
cd ..
|
||||
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
apt-get -y install `cat packages.txt`
|
||||
apt-get -y install gcc g++ cmake libnuma-dev
|
||||
|
||||
- name: Checkout vllm-project/vllm repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: vllm-project/vllm
|
||||
ref: ${{ inputs.vllm }}
|
||||
path: ./vllm-empty
|
||||
|
||||
- name: Install vllm-project/vllm from source
|
||||
working-directory: ./vllm-empty
|
||||
run: |
|
||||
VLLM_TARGET_DEVICE=empty pip install -e .
|
||||
|
||||
- name: Install vllm-project/vllm-ascend
|
||||
env:
|
||||
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
||||
run: |
|
||||
pip install -r requirements-dev.txt
|
||||
pip install -v -e .
|
||||
|
||||
- name: Install custom-ops (for DeepSeek-V3.2-Exp)
|
||||
if: ${{ inputs.name == 'deepseek3_2-exp-w8a8' }}
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/CANN-custom_ops-sfa-linux.aarch64.run
|
||||
chmod +x ./CANN-custom_ops-sfa-linux.aarch64.run
|
||||
./CANN-custom_ops-sfa-linux.aarch64.run --quiet
|
||||
export ASCEND_CUSTOM_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize:${ASCEND_CUSTOM_OPP_PATH}
|
||||
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize/op_api/lib/:${LD_LIBRARY_PATH}
|
||||
wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/custom_ops-1.0-cp311-cp311-linux_aarch64.whl
|
||||
pip install custom_ops-1.0-cp311-cp311-linux_aarch64.whl
|
||||
. /usr/local/Ascend/ascend-toolkit/set_env.sh
|
||||
|
||||
- name: Checkout aisbench repo and Install aisbench
|
||||
run: |
|
||||
git clone https://gitee.com/aisbench/benchmark.git
|
||||
cd benchmark
|
||||
git checkout v3.0-20250930-master
|
||||
pip3 install -e ./
|
||||
pip3 install -r requirements/api.txt
|
||||
pip3 install -r requirements/extra.txt
|
||||
echo ""
|
||||
echo "============================"
|
||||
echo "vLLM-Ascend Git information"
|
||||
echo "============================"
|
||||
cd vllm-ascend
|
||||
if [ -d .git ]; then
|
||||
echo "Branch: $(git rev-parse --abbrev-ref HEAD)"
|
||||
echo "Commit hash: $(git rev-parse HEAD)"
|
||||
echo "Author: $(git log -1 --pretty=format:'%an <%ae>')"
|
||||
echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)"
|
||||
echo "Message: $(git log -1 --pretty=format:'%s')"
|
||||
echo "Tags: $(git tag --points-at HEAD || echo 'None')"
|
||||
echo "Remote: $(git remote -v | head -n1)"
|
||||
echo ""
|
||||
else
|
||||
echo "No .git directory found in vllm-ascend"
|
||||
fi
|
||||
cd ..
|
||||
|
||||
- name: Run vllm-project/vllm-ascend test
|
||||
env:
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
VLLM_USE_MODELSCOPE: True
|
||||
VLLM_CI_RUNNER: ${{ inputs.runner }}
|
||||
BENCHMARK_HOME: /vllm-workspace/vllm-ascend/benchmark
|
||||
working-directory: /vllm-workspace/vllm-ascend
|
||||
run: |
|
||||
pytest -sv ${{ inputs.tests }}
|
||||
|
||||
57
.github/workflows/_kill_lws_resources.yaml
vendored
57
.github/workflows/_kill_lws_resources.yaml
vendored
@@ -1,57 +0,0 @@
|
||||
name: 'resource clear'
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
runner:
|
||||
required: false
|
||||
type: string
|
||||
default: linux-aarch64-a3-0
|
||||
secrets:
|
||||
KUBECONFIG_B64:
|
||||
required: true
|
||||
|
||||
|
||||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
||||
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
||||
# It's used to activate ascend-toolkit environment variables.
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -el {0}
|
||||
|
||||
jobs:
|
||||
resource_clear:
|
||||
# This is a runner with no NPU for k8s controller
|
||||
runs-on: ${{ inputs.runner }}
|
||||
container:
|
||||
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
|
||||
env:
|
||||
KUBECONFIG: /tmp/kubeconfig
|
||||
KUBECTL: /root/.cache/.kube/kubectl
|
||||
NAMESPACE: vllm-project
|
||||
LEADER_POD: vllm-0
|
||||
RESULT_FILE: /root/.cache/tests/ret/test_result.txt
|
||||
steps:
|
||||
- name: Install kubectl
|
||||
run: |
|
||||
# Install kubectl
|
||||
arch=$(uname -m)
|
||||
|
||||
if echo "$arch" | grep -qiE "arm|aarch64"; then
|
||||
echo "Detected ARM architecture: $arch"
|
||||
KUBECTL="$KUBECTL"_arm
|
||||
fi
|
||||
install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl
|
||||
|
||||
# Verify kubectl installation
|
||||
kubectl version --client=true
|
||||
|
||||
- name: Decode kubeconfig from secrets
|
||||
run: |
|
||||
# Decode and save kubeconfig
|
||||
echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG
|
||||
|
||||
- name: Clear LWS resources
|
||||
if: always()
|
||||
run: |
|
||||
kubectl delete leaderworkerset vllm -n "$NAMESPACE" --ignore-not-found
|
||||
74
.github/workflows/_nightly_image_build.yaml
vendored
Normal file
74
.github/workflows/_nightly_image_build.yaml
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
name: 'image / nightly / Ubuntu / test'
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 0,4,8,12,14 * * *'
|
||||
workflow_call:
|
||||
inputs:
|
||||
target:
|
||||
required: true
|
||||
type: string
|
||||
description: 'Target architecture, e.g., a2, a3'
|
||||
outputs:
|
||||
image-tag:
|
||||
description: 'The built image tag'
|
||||
value: ${{ jobs.build-and-sync.outputs.image-tag }}
|
||||
secrets:
|
||||
HW_USERNAME:
|
||||
required: true
|
||||
HW_TOKEN:
|
||||
required: true
|
||||
|
||||
# This workflow builds and pushes Docker images for nightly-ci
|
||||
# It will be built base on the quay.io/ascend/vllm-ascend:main
|
||||
# And have some customizations for nightly testing, pushing to Huawei Cloud SWR
|
||||
jobs:
|
||||
build-and-sync:
|
||||
runs-on: ubuntu-22.04-arm
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
target: ${{ fromJson(github.event_name == 'schedule' && '["a2","a3"]' || format('["{0}"]', inputs.target || 'a3')) }}
|
||||
|
||||
outputs:
|
||||
image-tag: ${{ steps.build-image.outputs.image-tag }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Show build target
|
||||
run: |
|
||||
echo "Building target: ${{ matrix.target }}"
|
||||
|
||||
- name: Login to Huawei Cloud SWR
|
||||
id: login-swr
|
||||
if: ${{ env.HW_USERNAME != '' && env.HW_TOKEN != '' }}
|
||||
run: |
|
||||
echo "${{ env.HW_TOKEN }}" | docker login -u "${{ env.HW_USERNAME }}" --password-stdin swr.cn-southwest-2.myhuaweicloud.com
|
||||
env:
|
||||
HW_USERNAME: ${{ secrets.HW_USERNAME }}
|
||||
HW_TOKEN: ${{ secrets.HW_TOKEN }}
|
||||
|
||||
- name: Build image
|
||||
id: build-image
|
||||
run: |
|
||||
TARGET="${{ matrix.target }}"
|
||||
IMAGE_TAG="swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-${TARGET}"
|
||||
|
||||
echo "Building image: $IMAGE_TAG"
|
||||
docker build \
|
||||
--network host \
|
||||
--platform linux/arm64 \
|
||||
-f .github/Dockerfile.nightly.${TARGET} \
|
||||
--build-arg CANN_VERSION="8.3.rc1" \
|
||||
--build-arg UBUNTU_VERSION="22.04" \
|
||||
--build-arg PYTHON_VERSION="3.11" \
|
||||
-t "$IMAGE_TAG" .
|
||||
|
||||
echo "image-tag=$IMAGE_TAG" >> $GITHUB_OUTPUT
|
||||
|
||||
# To avoid pushing images from forks, only push when the repository owner is 'vllm-project'
|
||||
- name: Push image to SWR
|
||||
if: ${{ github.repository_owner == 'vllm-project' && steps.login-swr.conclusion == 'success' }}
|
||||
run: |
|
||||
docker push ${{ steps.build-image.outputs.image-tag }}
|
||||
@@ -42,9 +42,18 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
image_build:
|
||||
name: nightly image build
|
||||
uses: ./.github/workflows/_nightly_image_build.yaml
|
||||
with:
|
||||
target: a2
|
||||
secrets:
|
||||
HW_USERNAME: ${{ secrets.HW_USERNAME }}
|
||||
HW_TOKEN: ${{ secrets.HW_TOKEN }}
|
||||
single-node-tests:
|
||||
name: single-node
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
|
||||
needs: image_build
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -63,10 +72,11 @@ jobs:
|
||||
vllm: v0.11.0
|
||||
runner: ${{ matrix.test_config.os }}
|
||||
tests: ${{ matrix.test_config.tests }}
|
||||
image: ${{ fromJSON(format('"{0}"', needs.image_build.outputs.image-tag || 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2')) }}
|
||||
|
||||
multi-node-tests:
|
||||
name: multi-node
|
||||
needs: single-node-tests
|
||||
needs: [single-node-tests, image_build]
|
||||
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -83,7 +93,7 @@ jobs:
|
||||
with:
|
||||
soc_version: a2
|
||||
runner: linux-aarch64-a2-0
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
|
||||
image: ${{ fromJSON(format('"{0}"', needs.image_build.outputs.image-tag || 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2')) }}
|
||||
replicas: 1
|
||||
size: ${{ matrix.test_config.size }}
|
||||
config_file_path: ${{ matrix.test_config.config_file_path }}
|
||||
|
||||
@@ -41,9 +41,18 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
image_build:
|
||||
name: nightly image build
|
||||
uses: ./.github/workflows/_nightly_image_build.yaml
|
||||
with:
|
||||
target: a3
|
||||
secrets:
|
||||
HW_USERNAME: ${{ secrets.HW_USERNAME }}
|
||||
HW_TOKEN: ${{ secrets.HW_TOKEN }}
|
||||
single-node-tests:
|
||||
name: single-node
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
|
||||
needs: image_build
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -94,13 +103,13 @@ jobs:
|
||||
with:
|
||||
vllm: v0.11.0
|
||||
runner: ${{ matrix.test_config.os }}
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-a3-ubuntu22.04-py3.11
|
||||
image: ${{ fromJSON(format('"{0}"', needs.image_build.outputs.image-tag || 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3')) }}
|
||||
tests: ${{ matrix.test_config.tests }}
|
||||
name: ${{ matrix.test_config.name }}
|
||||
|
||||
multi-node-tests:
|
||||
name: multi-node
|
||||
needs: single-node-tests
|
||||
needs: [single-node-tests, image_build]
|
||||
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -129,7 +138,7 @@ jobs:
|
||||
with:
|
||||
soc_version: a3
|
||||
runner: linux-aarch64-a3-0
|
||||
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
|
||||
image: ${{ fromJSON(format('"{0}"', needs.image_build.outputs.image-tag || 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3')) }}
|
||||
replicas: 1
|
||||
size: ${{ matrix.test_config.size }}
|
||||
config_file_path: ${{ matrix.test_config.config_file_path }}
|
||||
|
||||
Reference in New Issue
Block a user