diff --git a/.github/Dockerfile.nightly.a2 b/.github/Dockerfile.nightly.a2 new file mode 100644 index 00000000..e7e395f6 --- /dev/null +++ b/.github/Dockerfile.nightly.a2 @@ -0,0 +1,44 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# + +FROM quay.io/ascend/vllm-ascend:main + +ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" +ARG AIS_BENCH_TAG="v3.0-20250930-master" +ARG AIS_BENCH_URL="https://gitee.com/aisbench/benchmark.git" + +# Define environments +ENV DEBIAN_FRONTEND=noninteractive +ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS} + +WORKDIR /workspace + +RUN pip config set global.index-url ${PIP_INDEX_URL} + +# Install requirements-dev.txt for tests +RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \ + cd /vllm-workspace/vllm-ascend && \ + python3 -m pip install -r requirements-dev.txt && \ + python3 -m pip cache purge + +# Install benchmark tools +RUN git clone -b ${AIS_BENCH_TAG} --depth 1 ${AIS_BENCH_URL} /vllm-workspace/vllm-ascend/benchmark && \ + cd /vllm-workspace/vllm-ascend/benchmark && \ + pip install -e . -r requirements/api.txt -r requirements/extra.txt && \ + python3 -m pip cache purge + +CMD ["/bin/bash"] diff --git a/.github/Dockerfile.nightly.a3 b/.github/Dockerfile.nightly.a3 new file mode 100644 index 00000000..0012c544 --- /dev/null +++ b/.github/Dockerfile.nightly.a3 @@ -0,0 +1,44 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# + +FROM quay.io/ascend/vllm-ascend:main-a3 + +ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" +ARG AIS_BENCH_TAG="v3.0-20250930-master" +ARG AIS_BENCH_URL="https://gitee.com/aisbench/benchmark.git" + +# Define environments +ENV DEBIAN_FRONTEND=noninteractive +ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS} + +WORKDIR /workspace + +RUN pip config set global.index-url ${PIP_INDEX_URL} + +# Install requirements-dev.txt for tests +RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \ + cd /vllm-workspace/vllm-ascend && \ + python3 -m pip install -r requirements-dev.txt && \ + python3 -m pip cache purge + +# Install benchmark tools +RUN git clone -b ${AIS_BENCH_TAG} --depth 1 ${AIS_BENCH_URL} /vllm-workspace/vllm-ascend/benchmark && \ + cd /vllm-workspace/vllm-ascend/benchmark && \ + pip install -e . -r requirements/api.txt -r requirements/extra.txt && \ + python3 -m pip cache purge + +CMD ["/bin/bash"] diff --git a/.github/workflows/_e2e_nightly_single_node.yaml b/.github/workflows/_e2e_nightly_single_node.yaml index 4180addd..b60e4613 100644 --- a/.github/workflows/_e2e_nightly_single_node.yaml +++ b/.github/workflows/_e2e_nightly_single_node.yaml @@ -62,67 +62,56 @@ jobs: npu-smi info cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - - name: Config mirrors + - name: Show vLLM and vLLM-Ascend version + working-directory: /vllm-workspace run: | - sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list - pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple - apt-get update -y - apt install git -y - git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/ + echo "Installed vLLM-related Python packages:" + pip list | grep vllm || echo "No vllm packages found." - - name: Checkout vllm-project/vllm-ascend repo - uses: actions/checkout@v4 + echo "" + echo "============================" + echo "vLLM Git information" + echo "============================" + cd vllm + if [ -d .git ]; then + echo "Branch: $(git rev-parse --abbrev-ref HEAD)" + echo "Commit hash: $(git rev-parse HEAD)" + echo "Author: $(git log -1 --pretty=format:'%an <%ae>')" + echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)" + echo "Message: $(git log -1 --pretty=format:'%s')" + echo "Tags: $(git tag --points-at HEAD || echo 'None')" + echo "Remote: $(git remote -v | head -n1)" + echo "" + else + echo "No .git directory found in vllm" + fi + cd .. - - name: Install system dependencies - run: | - apt-get -y install `cat packages.txt` - apt-get -y install gcc g++ cmake libnuma-dev - - - name: Checkout vllm-project/vllm repo - uses: actions/checkout@v4 - with: - repository: vllm-project/vllm - ref: ${{ inputs.vllm }} - path: ./vllm-empty - - - name: Install vllm-project/vllm from source - working-directory: ./vllm-empty - run: | - VLLM_TARGET_DEVICE=empty pip install -e . - - - name: Install vllm-project/vllm-ascend - env: - PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi - run: | - pip install -r requirements-dev.txt - pip install -v -e . - - - name: Install custom-ops (for DeepSeek-V3.2-Exp) - if: ${{ inputs.name == 'deepseek3_2-exp-w8a8' }} - shell: bash -l {0} - run: | - wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/CANN-custom_ops-sfa-linux.aarch64.run - chmod +x ./CANN-custom_ops-sfa-linux.aarch64.run - ./CANN-custom_ops-sfa-linux.aarch64.run --quiet - export ASCEND_CUSTOM_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize:${ASCEND_CUSTOM_OPP_PATH} - export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize/op_api/lib/:${LD_LIBRARY_PATH} - wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/custom_ops-1.0-cp311-cp311-linux_aarch64.whl - pip install custom_ops-1.0-cp311-cp311-linux_aarch64.whl - . /usr/local/Ascend/ascend-toolkit/set_env.sh - - - name: Checkout aisbench repo and Install aisbench - run: | - git clone https://gitee.com/aisbench/benchmark.git - cd benchmark - git checkout v3.0-20250930-master - pip3 install -e ./ - pip3 install -r requirements/api.txt - pip3 install -r requirements/extra.txt + echo "" + echo "============================" + echo "vLLM-Ascend Git information" + echo "============================" + cd vllm-ascend + if [ -d .git ]; then + echo "Branch: $(git rev-parse --abbrev-ref HEAD)" + echo "Commit hash: $(git rev-parse HEAD)" + echo "Author: $(git log -1 --pretty=format:'%an <%ae>')" + echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)" + echo "Message: $(git log -1 --pretty=format:'%s')" + echo "Tags: $(git tag --points-at HEAD || echo 'None')" + echo "Remote: $(git remote -v | head -n1)" + echo "" + else + echo "No .git directory found in vllm-ascend" + fi + cd .. - name: Run vllm-project/vllm-ascend test env: VLLM_WORKER_MULTIPROC_METHOD: spawn VLLM_USE_MODELSCOPE: True VLLM_CI_RUNNER: ${{ inputs.runner }} + BENCHMARK_HOME: /vllm-workspace/vllm-ascend/benchmark + working-directory: /vllm-workspace/vllm-ascend run: | pytest -sv ${{ inputs.tests }} diff --git a/.github/workflows/_kill_lws_resources.yaml b/.github/workflows/_kill_lws_resources.yaml deleted file mode 100644 index 4c70ff2d..00000000 --- a/.github/workflows/_kill_lws_resources.yaml +++ /dev/null @@ -1,57 +0,0 @@ -name: 'resource clear' - -on: - workflow_call: - inputs: - runner: - required: false - type: string - default: linux-aarch64-a3-0 - secrets: - KUBECONFIG_B64: - required: true - - -# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly -# declared as "shell: bash -el {0}" on steps that need to be properly activated. -# It's used to activate ascend-toolkit environment variables. -defaults: - run: - shell: bash -el {0} - -jobs: - resource_clear: - # This is a runner with no NPU for k8s controller - runs-on: ${{ inputs.runner }} - container: - image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11 - env: - KUBECONFIG: /tmp/kubeconfig - KUBECTL: /root/.cache/.kube/kubectl - NAMESPACE: vllm-project - LEADER_POD: vllm-0 - RESULT_FILE: /root/.cache/tests/ret/test_result.txt - steps: - - name: Install kubectl - run: | - # Install kubectl - arch=$(uname -m) - - if echo "$arch" | grep -qiE "arm|aarch64"; then - echo "Detected ARM architecture: $arch" - KUBECTL="$KUBECTL"_arm - fi - install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl - - # Verify kubectl installation - kubectl version --client=true - - - name: Decode kubeconfig from secrets - run: | - # Decode and save kubeconfig - echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG - - - name: Clear LWS resources - if: always() - run: | - kubectl delete leaderworkerset vllm -n "$NAMESPACE" --ignore-not-found diff --git a/.github/workflows/_nightly_image_build.yaml b/.github/workflows/_nightly_image_build.yaml new file mode 100644 index 00000000..25570cd0 --- /dev/null +++ b/.github/workflows/_nightly_image_build.yaml @@ -0,0 +1,74 @@ +name: 'image / nightly / Ubuntu / test' + +on: + schedule: + - cron: '0 0,4,8,12,14 * * *' + workflow_call: + inputs: + target: + required: true + type: string + description: 'Target architecture, e.g., a2, a3' + outputs: + image-tag: + description: 'The built image tag' + value: ${{ jobs.build-and-sync.outputs.image-tag }} + secrets: + HW_USERNAME: + required: true + HW_TOKEN: + required: true + +# This workflow builds and pushes Docker images for nightly-ci +# It will be built base on the quay.io/ascend/vllm-ascend:main +# And have some customizations for nightly testing, pushing to Huawei Cloud SWR +jobs: + build-and-sync: + runs-on: ubuntu-22.04-arm + + strategy: + matrix: + target: ${{ fromJson(github.event_name == 'schedule' && '["a2","a3"]' || format('["{0}"]', inputs.target || 'a3')) }} + + outputs: + image-tag: ${{ steps.build-image.outputs.image-tag }} + + steps: + - uses: actions/checkout@v4 + + - name: Show build target + run: | + echo "Building target: ${{ matrix.target }}" + + - name: Login to Huawei Cloud SWR + id: login-swr + if: ${{ env.HW_USERNAME != '' && env.HW_TOKEN != '' }} + run: | + echo "${{ env.HW_TOKEN }}" | docker login -u "${{ env.HW_USERNAME }}" --password-stdin swr.cn-southwest-2.myhuaweicloud.com + env: + HW_USERNAME: ${{ secrets.HW_USERNAME }} + HW_TOKEN: ${{ secrets.HW_TOKEN }} + + - name: Build image + id: build-image + run: | + TARGET="${{ matrix.target }}" + IMAGE_TAG="swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-${TARGET}" + + echo "Building image: $IMAGE_TAG" + docker build \ + --network host \ + --platform linux/arm64 \ + -f .github/Dockerfile.nightly.${TARGET} \ + --build-arg CANN_VERSION="8.3.rc1" \ + --build-arg UBUNTU_VERSION="22.04" \ + --build-arg PYTHON_VERSION="3.11" \ + -t "$IMAGE_TAG" . + + echo "image-tag=$IMAGE_TAG" >> $GITHUB_OUTPUT + + # To avoid pushing images from forks, only push when the repository owner is 'vllm-project' + - name: Push image to SWR + if: ${{ github.repository_owner == 'vllm-project' && steps.login-swr.conclusion == 'success' }} + run: | + docker push ${{ steps.build-image.outputs.image-tag }} diff --git a/.github/workflows/vllm_ascend_test_nightly_a2.yaml b/.github/workflows/vllm_ascend_test_nightly_a2.yaml index d04c533e..8c1c0997 100644 --- a/.github/workflows/vllm_ascend_test_nightly_a2.yaml +++ b/.github/workflows/vllm_ascend_test_nightly_a2.yaml @@ -42,9 +42,18 @@ concurrency: cancel-in-progress: true jobs: + image_build: + name: nightly image build + uses: ./.github/workflows/_nightly_image_build.yaml + with: + target: a2 + secrets: + HW_USERNAME: ${{ secrets.HW_USERNAME }} + HW_TOKEN: ${{ secrets.HW_TOKEN }} single-node-tests: name: single-node - if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') + needs: image_build strategy: fail-fast: false matrix: @@ -63,10 +72,11 @@ jobs: vllm: v0.11.0 runner: ${{ matrix.test_config.os }} tests: ${{ matrix.test_config.tests }} + image: ${{ fromJSON(format('"{0}"', needs.image_build.outputs.image-tag || 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2')) }} multi-node-tests: name: multi-node - needs: single-node-tests + needs: [single-node-tests, image_build] if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') strategy: fail-fast: false @@ -83,7 +93,7 @@ jobs: with: soc_version: a2 runner: linux-aarch64-a2-0 - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11 + image: ${{ fromJSON(format('"{0}"', needs.image_build.outputs.image-tag || 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2')) }} replicas: 1 size: ${{ matrix.test_config.size }} config_file_path: ${{ matrix.test_config.config_file_path }} diff --git a/.github/workflows/vllm_ascend_test_nightly_a3.yaml b/.github/workflows/vllm_ascend_test_nightly_a3.yaml index a77e1d11..ad0af550 100644 --- a/.github/workflows/vllm_ascend_test_nightly_a3.yaml +++ b/.github/workflows/vllm_ascend_test_nightly_a3.yaml @@ -41,9 +41,18 @@ concurrency: cancel-in-progress: true jobs: + image_build: + name: nightly image build + uses: ./.github/workflows/_nightly_image_build.yaml + with: + target: a3 + secrets: + HW_USERNAME: ${{ secrets.HW_USERNAME }} + HW_TOKEN: ${{ secrets.HW_TOKEN }} single-node-tests: name: single-node - if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') + needs: image_build strategy: fail-fast: false matrix: @@ -94,13 +103,13 @@ jobs: with: vllm: v0.11.0 runner: ${{ matrix.test_config.os }} - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-a3-ubuntu22.04-py3.11 + image: ${{ fromJSON(format('"{0}"', needs.image_build.outputs.image-tag || 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3')) }} tests: ${{ matrix.test_config.tests }} name: ${{ matrix.test_config.name }} multi-node-tests: name: multi-node - needs: single-node-tests + needs: [single-node-tests, image_build] if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') strategy: fail-fast: false @@ -129,7 +138,7 @@ jobs: with: soc_version: a3 runner: linux-aarch64-a3-0 - image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11 + image: ${{ fromJSON(format('"{0}"', needs.image_build.outputs.image-tag || 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3')) }} replicas: 1 size: ${{ matrix.test_config.size }} config_file_path: ${{ matrix.test_config.config_file_path }} diff --git a/tests/e2e/nightly/multi_node/scripts/build_mooncake.sh b/tests/e2e/nightly/multi_node/scripts/build_mooncake.sh deleted file mode 100644 index 7627cf0c..00000000 --- a/tests/e2e/nightly/multi_node/scripts/build_mooncake.sh +++ /dev/null @@ -1,112 +0,0 @@ -#!/bin/bash - -set -e -set -o pipefail - -GREEN="\033[0;32m" -BLUE="\033[0;34m" -YELLOW="\033[0;33m" -RED="\033[0;31m" -NC="\033[0m" # No Color - -branch=${1:-v0.3.7.post2} - -repo_url="https://github.com/kvcache-ai/Mooncake" -repo_name="Mooncake" -state_file=".build_state" - -echo "[INFO] Branch: $branch" -echo "-------------------------------------------" - - -mark_done() { echo "$1" >> "$state_file"; } -is_done() { grep -Fxq "$1" "$state_file" 2>/dev/null; } - -if ! is_done "clone"; then - echo "[STEP] Clone repository..." - if [ -d "$repo_name" ]; then - echo "[WARN] Directory $repo_name already exists, skipping clone." - else - git clone --branch "$branch" --depth 1 "$repo_url" "$repo_name" - fi - mark_done "clone" -else - echo "[SKIP] Clone step already done." -fi - -init_ascend_env() { - cann_in_sys_path=/usr/local/Ascend/ascend-toolkit; \ - cann_in_user_path=$HOME/Ascend/ascend-toolkit; \ - uname_m=$(uname -m) && \ - if [ -f "${cann_in_sys_path}/set_env.sh" ]; then \ - source ${cann_in_sys_path}/set_env.sh; \ - export LD_LIBRARY_PATH=${cann_in_sys_path}/latest/lib64:${cann_in_sys_path}/latest/${uname_m}-linux/devlib:${LD_LIBRARY_PATH} ; \ - elif [ -f "${cann_in_user_path}/set_env.sh" ]; then \ - source "$HOME/Ascend/ascend-toolkit/set_env.sh"; \ - export LD_LIBRARY_PATH=${cann_in_user_path}/latest/lib64:${cann_in_user_path}/latest/${uname_m}-linux/devlib:${LD_LIBRARY_PATH}; \ - else \ - echo "No Ascend Toolkit found"; \ - exit 1; \ - fi -} - -init_ascend_env - -if ! is_done "deps"; then - cd "$repo_name" - echo "[STEP]Installing dependencies..." - sed -i 's|https://go.dev/dl/|https://golang.google.cn/dl/|g' dependencies.sh - bash dependencies.sh -y - cd .. - mark_done "deps" -else - echo "[SKIP] Dependencies already installed." -fi - - -if ! is_done "mpi"; then - echo "[STEP] Install MPI..." - apt purge -y mpich libmpich-dev openmpi-bin libopenmpi-dev || true - apt install -y mpich libmpich-dev - export CPATH=/usr/lib/aarch64-linux-gnu/mpich/include/:${CPATH:-} - export CPATH=/usr/lib/aarch64-linux-gnu/openmpi/lib:${CPATH:-} - mark_done "mpi" -else - echo "[SKIP] MPI installation already done." -fi - - -if ! is_done "build"; then - echo "[STEP] Compile and install..." - cd "$repo_name" - - if [ -d "build" ]; then - echo "[INFO] Removing existing build directory..." - rm -rf build - fi - - mkdir build && cd build - cmake .. -USE_ASCEND_DIRECT=ON || { echo "[ERROR] cmake failed."; exit 1; } - make -j || { echo "[ERROR] make failed."; exit 1; } - make install || { echo "[ERROR] make install failed."; exit 1; } - mark_done "build" -else - echo "[SKIP] Build already done." -fi - - -if ! grep -q "export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH" ~/.bashrc; then - echo -e "${YELLOW}Adding LD_LIBRARY_PATH to your PATH in ~/.bashrc${NC}" - echo 'export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH' >> ~/.bashrc - echo -e "${YELLOW}Please run 'source ~/.bashrc' or start a new terminal${NC}" -fi -export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH - - -echo "==========================================" -echo -e "${GREEN}[SUCCESS] Mooncake build completed!" -echo "You can rerun this script anytime — it will resume from the last step." -echo "==========================================" - -echo "Example startup command:" -echo "mooncake_master --eviction_high_watermark_ratio 0.8 --eviction_ratio 0.05 --port 50088" diff --git a/tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 b/tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 index 712fe3f0..a18d91a4 100644 --- a/tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 +++ b/tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 @@ -15,15 +15,13 @@ spec: spec: containers: - name: vllm-leader - image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11") }} + image: {{ image | default("swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3") }} env: - name: CONFIG_YAML_PATH value: {{ config_file_path | default("DeepSeek-V3.yaml") }} - name: WORKSPACE - value: "/root/workspace" + value: "/vllm-workspace" # Set vLLM version and vLLM-Ascend version here, once there is a new release, update here. - - name: VLLM_VERSION - value: "v0.11.0" - name: VLLM_ASCEND_VERSION value: {{ vllm_ascend_ref | default("main") }} - name: VLLM_ASCEND_REMOTE_URL @@ -75,15 +73,13 @@ spec: spec: containers: - name: vllm-worker - image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11") }} + image: {{ image | default("swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3") }} env: - name: CONFIG_YAML_PATH value: {{ config_file_path | default("DeepSeek-V3.yaml") }} - name: WORKSPACE - value: "/root/workspace" + value: "/vllm-workspace" # Set vLLM version and vLLM-Ascend version here, once there is a new release, update here. - - name: VLLM_VERSION - value: "v0.11.0" - name: VLLM_ASCEND_VERSION value: {{ vllm_ascend_ref | default("main") }} - name: VLLM_ASCEND_REMOTE_URL diff --git a/tests/e2e/nightly/multi_node/scripts/run.sh b/tests/e2e/nightly/multi_node/scripts/run.sh index b55ce8d9..080e0ea8 100644 --- a/tests/e2e/nightly/multi_node/scripts/run.sh +++ b/tests/e2e/nightly/multi_node/scripts/run.sh @@ -11,8 +11,8 @@ NC="\033[0m" # No Color # Configuration LOG_DIR="/root/.cache/tests/logs" OVERWRITE_LOGS=true -SRC_DIR="$WORKSPACE/source_code" export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$LD_LIBRARY_PATH +export BENCHMARK_HOME=${WORKSPACE}/vllm-ascend/benchmark # Function to print section headers print_section() { @@ -35,18 +35,50 @@ print_error() { exit 1 } -# Function to check command success -check_success() { - if [ $? -ne 0 ]; then - print_error "$1" +show_vllm_info() { + cd "$WORKSPACE" + echo "Installed vLLM-related Python packages:" + pip list | grep vllm || echo "No vllm packages found." + + echo "" + echo "============================" + echo "vLLM Git information" + echo "============================" + cd vllm + if [ -d .git ]; then + echo "Branch: $(git rev-parse --abbrev-ref HEAD)" + echo "Commit hash: $(git rev-parse HEAD)" + echo "Author: $(git log -1 --pretty=format:'%an <%ae>')" + echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)" + echo "Message: $(git log -1 --pretty=format:'%s')" + echo "Tags: $(git tag --points-at HEAD || echo 'None')" + echo "Remote: $(git remote -v | head -n1)" + echo "" + else + echo "No .git directory found in vllm" fi + cd .. + + echo "" + echo "============================" + echo "vLLM-Ascend Git information" + echo "============================" + cd vllm-ascend + if [ -d .git ]; then + echo "Branch: $(git rev-parse --abbrev-ref HEAD)" + echo "Commit hash: $(git rev-parse HEAD)" + echo "Author: $(git log -1 --pretty=format:'%an <%ae>')" + echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)" + echo "Message: $(git log -1 --pretty=format:'%s')" + echo "Tags: $(git tag --points-at HEAD || echo 'None')" + echo "Remote: $(git remote -v | head -n1)" + echo "" + else + echo "No .git directory found in vllm-ascend" + fi + cd .. } -if [ $(id -u) -ne 0 ]; then - print_error "Require root permission, try sudo ./dependencies.sh" -fi - - check_npu_info() { echo "====> Check NPU info" npu-smi info @@ -60,79 +92,6 @@ check_and_config() { export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi } -checkout_src() { - echo "====> Checkout source code" - mkdir -p "$SRC_DIR" - - # vllm-ascend - if [ ! -d "$SRC_DIR/vllm-ascend" ]; then - git clone --depth 1 -b $VLLM_ASCEND_VERSION $VLLM_ASCEND_REMOTE_URL "$SRC_DIR/vllm-ascend" - fi - - # vllm - if [ ! -d "$SRC_DIR/vllm" ]; then - git clone -b $VLLM_VERSION https://github.com/vllm-project/vllm.git "$SRC_DIR/vllm" - fi -} - -install_sys_dependencies() { - echo "====> Install system dependencies" - apt-get update -y - - DEP_LIST=() - while IFS= read -r line; do - [[ -n "$line" && ! "$line" =~ ^# ]] && DEP_LIST+=("$line") - done < "$SRC_DIR/vllm-ascend/packages.txt" - - apt-get install -y "${DEP_LIST[@]}" gcc g++ cmake libnuma-dev iproute2 -} - -install_vllm() { - echo "====> Install vllm and vllm-ascend" - VLLM_TARGET_DEVICE=empty pip install -e "$SRC_DIR/vllm" - pip install -e "$SRC_DIR/vllm-ascend" - pip install modelscope - # Install for pytest - pip install -r "$SRC_DIR/vllm-ascend/requirements-dev.txt" -} - -install_ais_bench() { - local AIS_BENCH="$SRC_DIR/vllm-ascend/benchmark" - git clone https://gitee.com/aisbench/benchmark.git $AIS_BENCH - cd $AIS_BENCH - git checkout v3.0-20250930-master - pip3 install -e ./ - pip3 install -r requirements/api.txt - pip3 install -r requirements/extra.txt - cd - -} - -install_extra_components() { - echo "====> Installing extra components for DeepSeek-v3.2-exp-bf16" - - if ! wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/CANN-custom_ops-sfa-linux.aarch64.run; then - echo "Failed to download CANN-custom_ops-sfa-linux.aarch64.run" - return 1 - fi - chmod +x ./CANN-custom_ops-sfa-linux.aarch64.run - ./CANN-custom_ops-sfa-linux.aarch64.run --quiet - - if ! wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/custom_ops-1.0-cp311-cp311-linux_aarch64.whl; then - echo "Failed to download custom_ops wheel" - return 1 - fi - pip install custom_ops-1.0-cp311-cp311-linux_aarch64.whl - -export ASCEND_CUSTOM_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize:${ASCEND_CUSTOM_OPP_PATH} -export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize/op_api/lib/:${LD_LIBRARY_PATH} -source /usr/local/Ascend/ascend-toolkit/set_env.sh -EOF - - rm -f CANN-custom_ops-sfa-linux.aarch64.run \ - custom_ops-1.0-cp311-cp311-linux_aarch64.whl - echo "====> Extra components installation completed" -} - kill_npu_processes() { pgrep python3 | xargs -r kill -9 pgrep VLLM | xargs -r kill -9 @@ -163,17 +122,9 @@ run_tests_with_log() { main() { check_npu_info check_and_config - checkout_src - install_sys_dependencies - install_vllm - if [[ "$CONFIG_YAML_PATH" == *"DeepSeek-V3_2-Exp-bf16.yaml" ]]; then - install_extra_components - fi - install_ais_bench - cd "$WORKSPACE/source_code" - . $SRC_DIR/vllm-ascend/tests/e2e/nightly/multi_node/scripts/build_mooncake.sh - cd "$WORKSPACE/source_code/vllm-ascend" + show_vllm_info + cd "$WORKSPACE/vllm-ascend" run_tests_with_log } -main "$@" \ No newline at end of file +main "$@" diff --git a/tools/aisbench.py b/tools/aisbench.py index 14f1468e..9f37f126 100644 --- a/tools/aisbench.py +++ b/tools/aisbench.py @@ -28,9 +28,12 @@ import huggingface_hub import pandas as pd from modelscope import snapshot_download # type: ignore -DATASET_CONF_DIR = "benchmark/ais_bench/benchmark/configs/datasets" -REQUEST_CONF_DIR = "benchmark/ais_bench/benchmark/configs/models/vllm_api" -DATASET_DIR = "benchmark/ais_bench/datasets" +BENCHMARK_HOME = os.getenv("BENCHMARK_HOME", os.path.abspath(".")) +DATASET_CONF_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "benchmark", + "configs", "datasets") +REQUEST_CONF_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "benchmark", + "configs", "models", "vllm_api") +DATASET_DIR = os.path.join(BENCHMARK_HOME, "ais_bench", "datasets") class AisbenchRunner: