From 7294f89e431ca525e30614ef1c791b53b80b5e16 Mon Sep 17 00:00:00 2001 From: Li Wang Date: Thu, 13 Nov 2025 20:10:12 +0800 Subject: [PATCH] [CI] Add daily images build for nightly ci (#3989) ### What this PR does / why we need it? Given the current excessively long build time of our nightly-ci, I recommend installing necessary, confirmed versions of packages in the Docker image to reduce the time required for integration testing. Including Mooncake vllm with fixed tags, This is expected to reduce nightly-ci duration by 2 hours. - vLLM version: v0.11.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379 --------- Signed-off-by: wangli --- .github/Dockerfile.nightly.a2 | 44 ++++++ .github/Dockerfile.nightly.a3 | 44 ++++++ .../workflows/_e2e_nightly_single_node.yaml | 95 ++++++------ .github/workflows/_kill_lws_resources.yaml | 57 ------- .github/workflows/_nightly_image_build.yaml | 74 ++++++++++ .../vllm_ascend_test_nightly_a2.yaml | 16 +- .../vllm_ascend_test_nightly_a3.yaml | 17 ++- .../multi_node/scripts/build_mooncake.sh | 112 -------------- .../multi_node/scripts/lws.yaml.jinja2 | 12 +- tests/e2e/nightly/multi_node/scripts/run.sh | 139 ++++++------------ tools/aisbench.py | 9 +- 11 files changed, 285 insertions(+), 334 deletions(-) create mode 100644 .github/Dockerfile.nightly.a2 create mode 100644 .github/Dockerfile.nightly.a3 delete mode 100644 .github/workflows/_kill_lws_resources.yaml create mode 100644 .github/workflows/_nightly_image_build.yaml delete mode 100644 tests/e2e/nightly/multi_node/scripts/build_mooncake.sh diff --git a/.github/Dockerfile.nightly.a2 b/.github/Dockerfile.nightly.a2 new file mode 100644 index 00000000..e7e395f6 --- /dev/null +++ b/.github/Dockerfile.nightly.a2 @@ -0,0 +1,44 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# + +FROM quay.io/ascend/vllm-ascend:main + +ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" +ARG AIS_BENCH_TAG="v3.0-20250930-master" +ARG AIS_BENCH_URL="https://gitee.com/aisbench/benchmark.git" + +# Define environments +ENV DEBIAN_FRONTEND=noninteractive +ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS} + +WORKDIR /workspace + +RUN pip config set global.index-url ${PIP_INDEX_URL} + +# Install requirements-dev.txt for tests +RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \ + cd /vllm-workspace/vllm-ascend && \ + python3 -m pip install -r requirements-dev.txt && \ + python3 -m pip cache purge + +# Install benchmark tools +RUN git clone -b ${AIS_BENCH_TAG} --depth 1 ${AIS_BENCH_URL} /vllm-workspace/vllm-ascend/benchmark && \ + cd /vllm-workspace/vllm-ascend/benchmark && \ + pip install -e . -r requirements/api.txt -r requirements/extra.txt && \ + python3 -m pip cache purge + +CMD ["/bin/bash"] diff --git a/.github/Dockerfile.nightly.a3 b/.github/Dockerfile.nightly.a3 new file mode 100644 index 00000000..0012c544 --- /dev/null +++ b/.github/Dockerfile.nightly.a3 @@ -0,0 +1,44 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# + +FROM quay.io/ascend/vllm-ascend:main-a3 + +ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" +ARG AIS_BENCH_TAG="v3.0-20250930-master" +ARG AIS_BENCH_URL="https://gitee.com/aisbench/benchmark.git" + +# Define environments +ENV DEBIAN_FRONTEND=noninteractive +ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS} + +WORKDIR /workspace + +RUN pip config set global.index-url ${PIP_INDEX_URL} + +# Install requirements-dev.txt for tests +RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \ + cd /vllm-workspace/vllm-ascend && \ + python3 -m pip install -r requirements-dev.txt && \ + python3 -m pip cache purge + +# Install benchmark tools +RUN git clone -b ${AIS_BENCH_TAG} --depth 1 ${AIS_BENCH_URL} /vllm-workspace/vllm-ascend/benchmark && \ + cd /vllm-workspace/vllm-ascend/benchmark && \ + pip install -e . -r requirements/api.txt -r requirements/extra.txt && \ + python3 -m pip cache purge + +CMD ["/bin/bash"] diff --git a/.github/workflows/_e2e_nightly_single_node.yaml b/.github/workflows/_e2e_nightly_single_node.yaml index 4180addd..b60e4613 100644 --- a/.github/workflows/_e2e_nightly_single_node.yaml +++ b/.github/workflows/_e2e_nightly_single_node.yaml @@ -62,67 +62,56 @@ jobs: npu-smi info cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - - name: Config mirrors + - name: Show vLLM and vLLM-Ascend version + working-directory: /vllm-workspace run: | - sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list - pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple - apt-get update -y - apt install git -y - git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/ + echo "Installed vLLM-related Python packages:" + pip list | grep vllm || echo "No vllm packages found." - - name: Checkout vllm-project/vllm-ascend repo - uses: actions/checkout@v4 + echo "" + echo "============================" + echo "vLLM Git information" + echo "============================" + cd vllm + if [ -d .git ]; then + echo "Branch: $(git rev-parse --abbrev-ref HEAD)" + echo "Commit hash: $(git rev-parse HEAD)" + echo "Author: $(git log -1 --pretty=format:'%an <%ae>')" + echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)" + echo "Message: $(git log -1 --pretty=format:'%s')" + echo "Tags: $(git tag --points-at HEAD || echo 'None')" + echo "Remote: $(git remote -v | head -n1)" + echo "" + else + echo "No .git directory found in vllm" + fi + cd .. - - name: Install system dependencies - run: | - apt-get -y install `cat packages.txt` - apt-get -y install gcc g++ cmake libnuma-dev - - - name: Checkout vllm-project/vllm repo - uses: actions/checkout@v4 - with: - repository: vllm-project/vllm - ref: ${{ inputs.vllm }} - path: ./vllm-empty - - - name: Install vllm-project/vllm from source - working-directory: ./vllm-empty - run: | - VLLM_TARGET_DEVICE=empty pip install -e . - - - name: Install vllm-project/vllm-ascend - env: - PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi - run: | - pip install -r requirements-dev.txt - pip install -v -e . - - - name: Install custom-ops (for DeepSeek-V3.2-Exp) - if: ${{ inputs.name == 'deepseek3_2-exp-w8a8' }} - shell: bash -l {0} - run: | - wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/CANN-custom_ops-sfa-linux.aarch64.run - chmod +x ./CANN-custom_ops-sfa-linux.aarch64.run - ./CANN-custom_ops-sfa-linux.aarch64.run --quiet - export ASCEND_CUSTOM_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize:${ASCEND_CUSTOM_OPP_PATH} - export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize/op_api/lib/:${LD_LIBRARY_PATH} - wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/custom_ops-1.0-cp311-cp311-linux_aarch64.whl - pip install custom_ops-1.0-cp311-cp311-linux_aarch64.whl - . /usr/local/Ascend/ascend-toolkit/set_env.sh - - - name: Checkout aisbench repo and Install aisbench - run: | - git clone https://gitee.com/aisbench/benchmark.git - cd benchmark - git checkout v3.0-20250930-master - pip3 install -e ./ - pip3 install -r requirements/api.txt - pip3 install -r requirements/extra.txt + echo "" + echo "============================" + echo "vLLM-Ascend Git information" + echo "============================" + cd vllm-ascend + if [ -d .git ]; then + echo "Branch: $(git rev-parse --abbrev-ref HEAD)" + echo "Commit hash: $(git rev-parse HEAD)" + echo "Author: $(git log -1 --pretty=format:'%an <%ae>')" + echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)" + echo "Message: $(git log -1 --pretty=format:'%s')" + echo "Tags: $(git tag --points-at HEAD || echo 'None')" + echo "Remote: $(git remote -v | head -n1)" + echo "" + else + echo "No .git directory found in vllm-ascend" + fi + cd .. - name: Run vllm-project/vllm-ascend test env: VLLM_WORKER_MULTIPROC_METHOD: spawn VLLM_USE_MODELSCOPE: True VLLM_CI_RUNNER: ${{ inputs.runner }} + BENCHMARK_HOME: /vllm-workspace/vllm-ascend/benchmark + working-directory: /vllm-workspace/vllm-ascend run: | pytest -sv ${{ inputs.tests }} diff --git a/.github/workflows/_kill_lws_resources.yaml b/.github/workflows/_kill_lws_resources.yaml deleted file mode 100644 index 4c70ff2d..00000000 --- a/.github/workflows/_kill_lws_resources.yaml +++ /dev/null @@ -1,57 +0,0 @@ -name: 'resource clear' - -on: - workflow_call: - inputs: - runner: - required: false - type: string - default: linux-aarch64-a3-0 - secrets: - KUBECONFIG_B64: - required: true - - -# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly -# declared as "shell: bash -el {0}" on steps that need to be properly activated. -# It's used to activate ascend-toolkit environment variables. -defaults: - run: - shell: bash -el {0} - -jobs: - resource_clear: - # This is a runner with no NPU for k8s controller - runs-on: ${{ inputs.runner }} - container: - image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11 - env: - KUBECONFIG: /tmp/kubeconfig - KUBECTL: /root/.cache/.kube/kubectl - NAMESPACE: vllm-project - LEADER_POD: vllm-0 - RESULT_FILE: /root/.cache/tests/ret/test_result.txt - steps: - - name: Install kubectl - run: | - # Install kubectl - arch=$(uname -m) - - if echo "$arch" | grep -qiE "arm|aarch64"; then - echo "Detected ARM architecture: $arch" - KUBECTL="$KUBECTL"_arm - fi - install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl - - # Verify kubectl installation - kubectl version --client=true - - - name: Decode kubeconfig from secrets - run: | - # Decode and save kubeconfig - echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG - - - name: Clear LWS resources - if: always() - run: | - kubectl delete leaderworkerset vllm -n "$NAMESPACE" --ignore-not-found diff --git a/.github/workflows/_nightly_image_build.yaml b/.github/workflows/_nightly_image_build.yaml new file mode 100644 index 00000000..25570cd0 --- /dev/null +++ b/.github/workflows/_nightly_image_build.yaml @@ -0,0 +1,74 @@ +name: 'image / nightly / Ubuntu / test' + +on: + schedule: + - cron: '0 0,4,8,12,14 * * *' + workflow_call: + inputs: + target: + required: true + type: string + description: 'Target architecture, e.g., a2, a3' + outputs: + image-tag: + description: 'The built image tag' + value: ${{ jobs.build-and-sync.outputs.image-tag }} + secrets: + HW_USERNAME: + required: true + HW_TOKEN: + required: true + +# This workflow builds and pushes Docker images for nightly-ci +# It will be built base on the quay.io/ascend/vllm-ascend:main +# And have some customizations for nightly testing, pushing to Huawei Cloud SWR +jobs: + build-and-sync: + runs-on: ubuntu-22.04-arm + + strategy: + matrix: + target: ${{ fromJson(github.event_name == 'schedule' && '["a2","a3"]' || format('["{0}"]', inputs.target || 'a3')) }} + + outputs: + image-tag: ${{ steps.build-image.outputs.image-tag }} + + steps: + - uses: actions/checkout@v4 + + - name: Show build target + run: | + echo "Building target: ${{ matrix.target }}" + + - name: Login to Huawei Cloud SWR + id: login-swr + if: ${{ env.HW_USERNAME != '' && env.HW_TOKEN != '' }} + run: | + echo "${{ env.HW_TOKEN }}" | docker login -u "${{ env.HW_USERNAME }}" --password-stdin swr.cn-southwest-2.myhuaweicloud.com + env: + HW_USERNAME: ${{ secrets.HW_USERNAME }} + HW_TOKEN: ${{ secrets.HW_TOKEN }} + + - name: Build image + id: build-image + run: | + TARGET="${{ matrix.target }}" + IMAGE_TAG="swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-${TARGET}" + + echo "Building image: $IMAGE_TAG" + docker build \ + --network host \ + --platform linux/arm64 \ + -f .github/Dockerfile.nightly.${TARGET} \ + --build-arg CANN_VERSION="8.3.rc1" \ + --build-arg UBUNTU_VERSION="22.04" \ + --build-arg PYTHON_VERSION="3.11" \ + -t "$IMAGE_TAG" . + + echo "image-tag=$IMAGE_TAG" >> $GITHUB_OUTPUT + + # To avoid pushing images from forks, only push when the repository owner is 'vllm-project' + - name: Push image to SWR + if: ${{ github.repository_owner == 'vllm-project' && steps.login-swr.conclusion == 'success' }} + run: | + docker push ${{ steps.build-image.outputs.image-tag }} diff --git a/.github/workflows/vllm_ascend_test_nightly_a2.yaml b/.github/workflows/vllm_ascend_test_nightly_a2.yaml index d04c533e..8c1c0997 100644 --- a/.github/workflows/vllm_ascend_test_nightly_a2.yaml +++ b/.github/workflows/vllm_ascend_test_nightly_a2.yaml @@ -42,9 +42,18 @@ concurrency: cancel-in-progress: true jobs: + image_build: + name: nightly image build + uses: ./.github/workflows/_nightly_image_build.yaml + with: + target: a2 + secrets: + HW_USERNAME: ${{ secrets.HW_USERNAME }} + HW_TOKEN: ${{ secrets.HW_TOKEN }} single-node-tests: name: single-node - if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') + needs: image_build strategy: fail-fast: false matrix: @@ -63,10 +72,11 @@ jobs: vllm: v0.11.0 runner: ${{ matrix.test_config.os }} tests: ${{ matrix.test_config.tests }} + image: ${{ fromJSON(format('"{0}"', needs.image_build.outputs.image-tag || 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2')) }} multi-node-tests: name: multi-node - needs: single-node-tests + needs: [single-node-tests, image_build] if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') strategy: fail-fast: false @@ -83,7 +93,7 @@ jobs: with: soc_version: a2 runner: linux-aarch64-a2-0 - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11 + image: ${{ fromJSON(format('"{0}"', needs.image_build.outputs.image-tag || 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2')) }} replicas: 1 size: ${{ matrix.test_config.size }} config_file_path: ${{ matrix.test_config.config_file_path }} diff --git a/.github/workflows/vllm_ascend_test_nightly_a3.yaml b/.github/workflows/vllm_ascend_test_nightly_a3.yaml index a77e1d11..ad0af550 100644 --- a/.github/workflows/vllm_ascend_test_nightly_a3.yaml +++ b/.github/workflows/vllm_ascend_test_nightly_a3.yaml @@ -41,9 +41,18 @@ concurrency: cancel-in-progress: true jobs: + image_build: + name: nightly image build + uses: ./.github/workflows/_nightly_image_build.yaml + with: + target: a3 + secrets: + HW_USERNAME: ${{ secrets.HW_USERNAME }} + HW_TOKEN: ${{ secrets.HW_TOKEN }} single-node-tests: name: single-node - if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') + needs: image_build strategy: fail-fast: false matrix: @@ -94,13 +103,13 @@ jobs: with: vllm: v0.11.0 runner: ${{ matrix.test_config.os }} - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-a3-ubuntu22.04-py3.11 + image: ${{ fromJSON(format('"{0}"', needs.image_build.outputs.image-tag || 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3')) }} tests: ${{ matrix.test_config.tests }} name: ${{ matrix.test_config.name }} multi-node-tests: name: multi-node - needs: single-node-tests + needs: [single-node-tests, image_build] if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') strategy: fail-fast: false @@ -129,7 +138,7 @@ jobs: with: soc_version: a3 runner: linux-aarch64-a3-0 - image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11 + image: ${{ fromJSON(format('"{0}"', needs.image_build.outputs.image-tag || 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3')) }} replicas: 1 size: ${{ matrix.test_config.size }} config_file_path: ${{ matrix.test_config.config_file_path }} diff --git a/tests/e2e/nightly/multi_node/scripts/build_mooncake.sh b/tests/e2e/nightly/multi_node/scripts/build_mooncake.sh deleted file mode 100644 index 7627cf0c..00000000 --- a/tests/e2e/nightly/multi_node/scripts/build_mooncake.sh +++ /dev/null @@ -1,112 +0,0 @@ -#!/bin/bash - -set -e -set -o pipefail - -GREEN="\033[0;32m" -BLUE="\033[0;34m" -YELLOW="\033[0;33m" -RED="\033[0;31m" -NC="\033[0m" # No Color - -branch=${1:-v0.3.7.post2} - -repo_url="https://github.com/kvcache-ai/Mooncake" -repo_name="Mooncake" -state_file=".build_state" - -echo "[INFO] Branch: $branch" -echo "-------------------------------------------" - - -mark_done() { echo "$1" >> "$state_file"; } -is_done() { grep -Fxq "$1" "$state_file" 2>/dev/null; } - -if ! is_done "clone"; then - echo "[STEP] Clone repository..." - if [ -d "$repo_name" ]; then - echo "[WARN] Directory $repo_name already exists, skipping clone." - else - git clone --branch "$branch" --depth 1 "$repo_url" "$repo_name" - fi - mark_done "clone" -else - echo "[SKIP] Clone step already done." -fi - -init_ascend_env() { - cann_in_sys_path=/usr/local/Ascend/ascend-toolkit; \ - cann_in_user_path=$HOME/Ascend/ascend-toolkit; \ - uname_m=$(uname -m) && \ - if [ -f "${cann_in_sys_path}/set_env.sh" ]; then \ - source ${cann_in_sys_path}/set_env.sh; \ - export LD_LIBRARY_PATH=${cann_in_sys_path}/latest/lib64:${cann_in_sys_path}/latest/${uname_m}-linux/devlib:${LD_LIBRARY_PATH} ; \ - elif [ -f "${cann_in_user_path}/set_env.sh" ]; then \ - source "$HOME/Ascend/ascend-toolkit/set_env.sh"; \ - export LD_LIBRARY_PATH=${cann_in_user_path}/latest/lib64:${cann_in_user_path}/latest/${uname_m}-linux/devlib:${LD_LIBRARY_PATH}; \ - else \ - echo "No Ascend Toolkit found"; \ - exit 1; \ - fi -} - -init_ascend_env - -if ! is_done "deps"; then - cd "$repo_name" - echo "[STEP]Installing dependencies..." - sed -i 's|https://go.dev/dl/|https://golang.google.cn/dl/|g' dependencies.sh - bash dependencies.sh -y - cd .. - mark_done "deps" -else - echo "[SKIP] Dependencies already installed." -fi - - -if ! is_done "mpi"; then - echo "[STEP] Install MPI..." - apt purge -y mpich libmpich-dev openmpi-bin libopenmpi-dev || true - apt install -y mpich libmpich-dev - export CPATH=/usr/lib/aarch64-linux-gnu/mpich/include/:${CPATH:-} - export CPATH=/usr/lib/aarch64-linux-gnu/openmpi/lib:${CPATH:-} - mark_done "mpi" -else - echo "[SKIP] MPI installation already done." -fi - - -if ! is_done "build"; then - echo "[STEP] Compile and install..." - cd "$repo_name" - - if [ -d "build" ]; then - echo "[INFO] Removing existing build directory..." - rm -rf build - fi - - mkdir build && cd build - cmake .. -USE_ASCEND_DIRECT=ON || { echo "[ERROR] cmake failed."; exit 1; } - make -j || { echo "[ERROR] make failed."; exit 1; } - make install || { echo "[ERROR] make install failed."; exit 1; } - mark_done "build" -else - echo "[SKIP] Build already done." -fi - - -if ! grep -q "export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH" ~/.bashrc; then - echo -e "${YELLOW}Adding LD_LIBRARY_PATH to your PATH in ~/.bashrc${NC}" - echo 'export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH' >> ~/.bashrc - echo -e "${YELLOW}Please run 'source ~/.bashrc' or start a new terminal${NC}" -fi -export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH - - -echo "==========================================" -echo -e "${GREEN}[SUCCESS] Mooncake build completed!" -echo "You can rerun this script anytime — it will resume from the last step." -echo "==========================================" - -echo "Example startup command:" -echo "mooncake_master --eviction_high_watermark_ratio 0.8 --eviction_ratio 0.05 --port 50088" diff --git a/tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 b/tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 index 712fe3f0..a18d91a4 100644 --- a/tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 +++ b/tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 @@ -15,15 +15,13 @@ spec: spec: containers: - name: vllm-leader - image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11") }} + image: {{ image | default("swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3") }} env: - name: CONFIG_YAML_PATH value: {{ config_file_path | default("DeepSeek-V3.yaml") }} - name: WORKSPACE - value: "/root/workspace" + value: "/vllm-workspace" # Set vLLM version and vLLM-Ascend version here, once there is a new release, update here. - - name: VLLM_VERSION - value: "v0.11.0" - name: VLLM_ASCEND_VERSION value: {{ vllm_ascend_ref | default("main") }} - name: VLLM_ASCEND_REMOTE_URL @@ -75,15 +73,13 @@ spec: spec: containers: - name: vllm-worker - image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11") }} + image: {{ image | default("swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3") }} env: - name: CONFIG_YAML_PATH value: {{ config_file_path | default("DeepSeek-V3.yaml") }} - name: WORKSPACE - value: "/root/workspace" + value: "/vllm-workspace" # Set vLLM version and vLLM-Ascend version here, once there is a new release, update here. - - name: VLLM_VERSION - value: "v0.11.0" - name: VLLM_ASCEND_VERSION value: {{ vllm_ascend_ref | default("main") }} - name: VLLM_ASCEND_REMOTE_URL diff --git a/tests/e2e/nightly/multi_node/scripts/run.sh b/tests/e2e/nightly/multi_node/scripts/run.sh index b55ce8d9..080e0ea8 100644 --- a/tests/e2e/nightly/multi_node/scripts/run.sh +++ b/tests/e2e/nightly/multi_node/scripts/run.sh @@ -11,8 +11,8 @@ NC="\033[0m" # No Color # Configuration LOG_DIR="/root/.cache/tests/logs" OVERWRITE_LOGS=true -SRC_DIR="$WORKSPACE/source_code" export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH +export BENCHMARK_HOME=${WORKSPACE}/vllm-ascend/benchmark # Function to print section headers print_section() { @@ -35,18 +35,50 @@ print_error() { exit 1 } -# Function to check command success -check_success() { - if [ $? -ne 0 ]; then - print_error "$1" +show_vllm_info() { + cd "$WORKSPACE" + echo "Installed vLLM-related Python packages:" + pip list | grep vllm || echo "No vllm packages found." + + echo "" + echo "============================" + echo "vLLM Git information" + echo "============================" + cd vllm + if [ -d .git ]; then + echo "Branch: $(git rev-parse --abbrev-ref HEAD)" + echo "Commit hash: $(git rev-parse HEAD)" + echo "Author: $(git log -1 --pretty=format:'%an <%ae>')" + echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)" + echo "Message: $(git log -1 --pretty=format:'%s')" + echo "Tags: $(git tag --points-at HEAD || echo 'None')" + echo "Remote: $(git remote -v | head -n1)" + echo "" + else + echo "No .git directory found in vllm" fi + cd .. + + echo "" + echo "============================" + echo "vLLM-Ascend Git information" + echo "============================" + cd vllm-ascend + if [ -d .git ]; then + echo "Branch: $(git rev-parse --abbrev-ref HEAD)" + echo "Commit hash: $(git rev-parse HEAD)" + echo "Author: $(git log -1 --pretty=format:'%an <%ae>')" + echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)" + echo "Message: $(git log -1 --pretty=format:'%s')" + echo "Tags: $(git tag --points-at HEAD || echo 'None')" + echo "Remote: $(git remote -v | head -n1)" + echo "" + else + echo "No .git directory found in vllm-ascend" + fi + cd .. } -if [ $(id -u) -ne 0 ]; then - print_error "Require root permission, try sudo ./dependencies.sh" -fi - - check_npu_info() { echo "====> Check NPU info" npu-smi info @@ -60,79 +92,6 @@ check_and_config() { export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi } -checkout_src() { - echo "====> Checkout source code" - mkdir -p "$SRC_DIR" - - # vllm-ascend - if [ ! -d "$SRC_DIR/vllm-ascend" ]; then - git clone --depth 1 -b $VLLM_ASCEND_VERSION $VLLM_ASCEND_REMOTE_URL "$SRC_DIR/vllm-ascend" - fi - - # vllm - if [ ! -d "$SRC_DIR/vllm" ]; then - git clone -b $VLLM_VERSION https://github.com/vllm-project/vllm.git "$SRC_DIR/vllm" - fi -} - -install_sys_dependencies() { - echo "====> Install system dependencies" - apt-get update -y - - DEP_LIST=() - while IFS= read -r line; do - [[ -n "$line" && ! "$line" =~ ^# ]] && DEP_LIST+=("$line") - done < "$SRC_DIR/vllm-ascend/packages.txt" - - apt-get install -y "${DEP_LIST[@]}" gcc g++ cmake libnuma-dev iproute2 -} - -install_vllm() { - echo "====> Install vllm and vllm-ascend" - VLLM_TARGET_DEVICE=empty pip install -e "$SRC_DIR/vllm" - pip install -e "$SRC_DIR/vllm-ascend" - pip install modelscope - # Install for pytest - pip install -r "$SRC_DIR/vllm-ascend/requirements-dev.txt" -} - -install_ais_bench() { - local AIS_BENCH="$SRC_DIR/vllm-ascend/benchmark" - git clone https://gitee.com/aisbench/benchmark.git $AIS_BENCH - cd $AIS_BENCH - git checkout v3.0-20250930-master - pip3 install -e ./ - pip3 install -r requirements/api.txt - pip3 install -r requirements/extra.txt - cd - -} - -install_extra_components() { - echo "====> Installing extra components for DeepSeek-v3.2-exp-bf16" - - if ! wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/CANN-custom_ops-sfa-linux.aarch64.run; then - echo "Failed to download CANN-custom_ops-sfa-linux.aarch64.run" - return 1 - fi - chmod +x ./CANN-custom_ops-sfa-linux.aarch64.run - ./CANN-custom_ops-sfa-linux.aarch64.run --quiet - - if ! wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/custom_ops-1.0-cp311-cp311-linux_aarch64.whl; then - echo "Failed to download custom_ops wheel" - return 1 - fi - pip install custom_ops-1.0-cp311-cp311-linux_aarch64.whl - -export ASCEND_CUSTOM_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize:${ASCEND_CUSTOM_OPP_PATH} -export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize/op_api/lib/:${LD_LIBRARY_PATH} -source /usr/local/Ascend/ascend-toolkit/set_env.sh -EOF - - rm -f CANN-custom_ops-sfa-linux.aarch64.run \ - custom_ops-1.0-cp311-cp311-linux_aarch64.whl - echo "====> Extra components installation completed" -} - kill_npu_processes() { pgrep python3 | xargs -r kill -9 pgrep VLLM | xargs -r kill -9 @@ -163,17 +122,9 @@ run_tests_with_log() { main() { check_npu_info check_and_config - checkout_src - install_sys_dependencies - install_vllm - if [[ "$CONFIG_YAML_PATH" == *"DeepSeek-V3_2-Exp-bf16.yaml" ]]; then - install_extra_components - fi - install_ais_bench - cd "$WORKSPACE/source_code" - . $SRC_DIR/vllm-ascend/tests/e2e/nightly/multi_node/scripts/build_mooncake.sh - cd "$WORKSPACE/source_code/vllm-ascend" + show_vllm_info + cd "$WORKSPACE/vllm-ascend" run_tests_with_log } -main "$@" \ No newline at end of file +main "$@" diff --git a/tools/aisbench.py b/tools/aisbench.py index 14f1468e..9f37f126 100644 --- a/tools/aisbench.py +++ b/tools/aisbench.py @@ -28,9 +28,12 @@ import huggingface_hub import pandas as pd from modelscope import snapshot_download # type: ignore -DATASET_CONF_DIR = "benchmark/ais_bench/benchmark/configs/datasets" -REQUEST_CONF_DIR = "benchmark/ais_bench/benchmark/configs/models/vllm_api" -DATASET_DIR = "benchmark/ais_bench/datasets" +BENCHMARK_HOME = os.getenv("BENCHMARK_HOME", os.path.abspath(".")) +DATASET_CONF_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "benchmark", + "configs", "datasets") +REQUEST_CONF_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "benchmark", + "configs", "models", "vllm_api") +DATASET_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "datasets") class AisbenchRunner: