From 84b00695f81c381ddcfd58a00cf45576b9244771 Mon Sep 17 00:00:00 2001 From: wjunLu <135617475+wjunLu@users.noreply.github.com> Date: Sat, 28 Feb 2026 09:06:00 +0800 Subject: [PATCH] [CI] Refactor to speedup image building and CI Installation (#6708) ### What this PR does / why we need it? 1. Refactor image workflow using cache-from to speedup builds ![build](https://github.com/user-attachments/assets/02135c12-0069-44f8-a3ec-5c2b4282448a) Simultaneously refactored all Dockerfiles by placing layers that rarely change before those that change frequently, improving build cache hit rate. 2. Refactor E2E test using vllm-ascend container images, to skip C compile while no C code are changed ![e2e](https://github.com/user-attachments/assets/49f5b166-0df3-41e1-8f71-b3bbbed17cfd) In this case, the job will only replace the source code of vllm-ascend and install `requirements-dev.txt`, saving about 10min before tests ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 Signed-off-by: wjunLu --- .github/workflows/_e2e_test.yaml | 382 ++++++++++++++---- .github/workflows/_schedule_image_build.yaml | 23 ++ .github/workflows/pr_test_full.yaml | 2 +- .github/workflows/pr_test_light.yaml | 2 +- .../schedule_image_build_and_push.yaml | 9 +- .../schedule_nightly_image_build.yaml | 1 + .../workflows/schedule_test_vllm_main.yaml | 2 +- Dockerfile | 50 +-- Dockerfile.310p | 36 +- Dockerfile.310p.openEuler | 30 +- Dockerfile.a3 | 50 ++- Dockerfile.a3.openEuler | 40 +- Dockerfile.openEuler | 42 +- 13 files changed, 456 insertions(+), 213 deletions(-) diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml index da0849b5..15690750 100644 --- a/.github/workflows/_e2e_test.yaml +++ b/.github/workflows/_e2e_test.yaml @@ -17,16 +17,71 @@ on: type: boolean jobs: + select-image: + runs-on: linux-aarch64-a2b3-0 + outputs: + image: ${{ steps.select.outputs.image }} + image_a3: ${{ steps.select.outputs.image_a3 }} + image_310p: ${{ steps.select.outputs.image_310p }} + steps: + - name: Select image based on base branch + id: select + env: + IMAGE_NAMESPACE: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend + run: | + BRANCH="${{ github.base_ref }}" + BRANCH_TAG="${BRANCH//\//-}" + MAIN_IMAGE="${{ inputs.image }}" + MAIN_IMAGE_A3="${{ inputs.image }}-a3" + MAIN_IMAGE_310P="${{ inputs.image }}-310p" + if [ "$BRANCH_TAG" = "main" ]; then + echo "Target branch is main, using main images: ${MAIN_IMAGE} / ${MAIN_IMAGE_A3} / ${MAIN_IMAGE_310P}" + echo "image=${MAIN_IMAGE}" >> $GITHUB_OUTPUT + echo "image_a3=${MAIN_IMAGE_A3}" >> $GITHUB_OUTPUT + echo "image_310p=${MAIN_IMAGE_310P}" >> $GITHUB_OUTPUT + exit 0 + fi + + # while target branch is not main + BRANCH_IMAGE="${IMAGE_NAMESPACE}:${BRANCH_TAG}" + BRANCH_IMAGE_A3="${IMAGE_NAMESPACE}:${BRANCH_TAG}-a3" + BRANCH_IMAGE_310P="${IMAGE_NAMESPACE}:${BRANCH_TAG}-310p" + # Check if branch-specific A2 image exists via IMAGE_NAMESPACE, fallback to main if not + if docker manifest inspect "${BRANCH_IMAGE}" > /dev/null 2>&1; then + echo "Using branch image: ${BRANCH_IMAGE}" + echo "image=${BRANCH_IMAGE}" >> $GITHUB_OUTPUT + else + echo "Branch image not found, falling back to ${MAIN_IMAGE}" + echo "image=${MAIN_IMAGE}" >> $GITHUB_OUTPUT + fi + # Check if branch-specific A3 image exists via IMAGE_NAMESPACE, fallback to main if not + if docker manifest inspect "${BRANCH_IMAGE_A3}" > /dev/null 2>&1; then + echo "Using branch A3 image: ${BRANCH_IMAGE_A3}" + echo "image_a3=${BRANCH_IMAGE_A3}" >> $GITHUB_OUTPUT + else + echo "Branch A3 image not found, falling back to ${MAIN_IMAGE_A3}" + echo "image_a3=${MAIN_IMAGE_A3}" >> $GITHUB_OUTPUT + fi + # Check if branch-specific 310P image exists via IMAGE_NAMESPACE, fallback to main if not + if docker manifest inspect "${BRANCH_IMAGE_310P}" > /dev/null 2>&1; then + echo "Using branch 310P image: ${BRANCH_IMAGE_310P}" + echo "image_310p=${BRANCH_IMAGE_310P}" >> $GITHUB_OUTPUT + else + echo "Branch 310P image not found, falling back to ${MAIN_IMAGE_310P}" + echo "image_310p=${MAIN_IMAGE_310P}" >> $GITHUB_OUTPUT + fi + e2e-light: name: singlecard-light if: ${{ inputs.type == 'light' }} + needs: [select-image] runs-on: linux-aarch64-a2b3-1 strategy: fail-fast: false matrix: part: [0] container: - image: ${{ inputs.image }} + image: ${{ needs.select-image.outputs.image }} env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True @@ -34,6 +89,8 @@ jobs: steps: - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v6 + with: + fetch-depth: 0 - name: Check npu and CANN info run: | npu-smi info @@ -46,14 +103,11 @@ jobs: pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y + git config --global --add safe.directory "${GITHUB_WORKSPACE}" - name: Install system dependencies run: | apt-get -y install `cat packages.txt` - apt-get -y install gcc g++ cmake libnuma-dev clang-15 - - update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 - update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 @@ -63,19 +117,45 @@ jobs: path: ./vllm-empty fetch-depth: 1 - - name: Install vllm-project/vllm from source - working-directory: ./vllm-empty + - name: Install vllm-project/vllm run: | - VLLM_TARGET_DEVICE=empty pip install -e . + pip uninstall -y vllm + rm -rf /vllm-workspace/vllm + cp -r ./vllm-empty /vllm-workspace/vllm + VLLM_TARGET_DEVICE=empty pip install -v -e /vllm-workspace/vllm/ - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | - pip install -r requirements-dev.txt - pip install -v -e . + DEST="/vllm-workspace/vllm-ascend" + # rebase commits + git config user.email "action@github.com" + git config user.name "GitHub Action" + git fetch origin ${{ github.base_ref }} + git rebase origin/${{ github.base_ref }} + + IMAGE_SHA=$(git -C "${DEST}" log -1 --format=%H 2>/dev/null || echo "") + cp -rT . "${DEST}/" + if [ -n "$IMAGE_SHA" ] && git cat-file -e "${IMAGE_SHA}" 2>/dev/null; then + C_CHANGES=$(git diff "${IMAGE_SHA}"..HEAD --name-only -- \ + csrc/ cmake/ CMakeLists.txt setup.py requirements.txt requirements-dev.txt) + echo "[debug] C_CHANGES=${C_CHANGES:-}" + else + echo "[debug] IMAGE_SHA not found in local history (empty or unreachable), forcing reinstall" + C_CHANGES="yes" + fi + + pip install -r ${DEST}/requirements-dev.txt + if [ -n "$C_CHANGES" ]; then + echo "[debug] C code / build changes detected, reinstalling vllm-ascend..." + pip install -v -e "${DEST}/" + else + echo "[debug] No C code / build changes detected, skipping reinstall." + fi - name: Run vllm-project/vllm-ascend test + working-directory: /vllm-workspace/vllm-ascend env: PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 VLLM_WORKER_MULTIPROC_METHOD: spawn @@ -85,13 +165,14 @@ jobs: e2e-full: name: singlecard-full if: ${{ inputs.type == 'full' }} + needs: [select-image] runs-on: linux-aarch64-a2b3-1 strategy: fail-fast: false matrix: part: [0, 1] container: - image: ${{ inputs.image }} + image: ${{ needs.select-image.outputs.image }} env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True @@ -100,6 +181,8 @@ jobs: steps: - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v6 + with: + fetch-depth: 0 - name: Check npu and CANN info run: | @@ -113,14 +196,11 @@ jobs: pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y + git config --global --add safe.directory "${GITHUB_WORKSPACE}" - name: Install system dependencies run: | apt-get -y install `cat packages.txt` - apt-get -y install gcc g++ cmake libnuma-dev clang-15 - - update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 - update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 @@ -130,18 +210,44 @@ jobs: path: ./vllm-empty fetch-depth: 1 - - name: Install vllm-project/vllm from source - working-directory: ./vllm-empty + - name: Install vllm-project/vllm run: | - VLLM_TARGET_DEVICE=empty pip install -e . + pip uninstall -y vllm + rm -rf /vllm-workspace/vllm + cp -r ./vllm-empty /vllm-workspace/vllm + VLLM_TARGET_DEVICE=empty pip install -v -e /vllm-workspace/vllm/ - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | - pip install -r requirements-dev.txt - pip install -v -e . + DEST="/vllm-workspace/vllm-ascend" + # rebase commits + git config user.email "action@github.com" + git config user.name "GitHub Action" + git fetch origin ${{ github.base_ref }} + git rebase origin/${{ github.base_ref }} + + IMAGE_SHA=$(git -C "${DEST}" log -1 --format=%H 2>/dev/null || echo "") + cp -rT . "${DEST}/" + if [ -n "$IMAGE_SHA" ] && git cat-file -e "${IMAGE_SHA}" 2>/dev/null; then + C_CHANGES=$(git diff "${IMAGE_SHA}"..HEAD --name-only -- \ + csrc/ cmake/ CMakeLists.txt setup.py requirements.txt requirements-dev.txt) + echo "[debug] C_CHANGES=${C_CHANGES:-}" + else + echo "[debug] IMAGE_SHA not found in local history (empty or unreachable), forcing reinstall" + C_CHANGES="yes" + fi + pip install -r ${DEST}/requirements-dev.txt + if [ -n "$C_CHANGES" ]; then + echo "[debug] C code / build changes detected, reinstalling vllm-ascend..." + pip install -v -e "${DEST}/" + else + echo "[debug] No C code / build changes detected, skipping reinstall." + fi + - name: Run e2e test + working-directory: /vllm-workspace/vllm-ascend env: VLLM_WORKER_MULTIPROC_METHOD: spawn PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 @@ -151,13 +257,14 @@ jobs: e2e-2-cards-light: name: multicard-2-light if: ${{ inputs.type == 'light' }} + needs: [select-image] runs-on: linux-aarch64-a3-2 strategy: fail-fast: false matrix: part: [0] container: - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11 + image: ${{ needs.select-image.outputs.image_a3 }} env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True @@ -166,6 +273,8 @@ jobs: steps: - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v6 + with: + fetch-depth: 0 - name: Check npu and CANN info run: | npu-smi info @@ -178,14 +287,11 @@ jobs: pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y + git config --global --add safe.directory "${GITHUB_WORKSPACE}" - name: Install system dependencies run: | apt-get -y install `cat packages.txt` - apt-get -y install gcc g++ cmake libnuma-dev clang-15 - - update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 - update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 @@ -195,18 +301,44 @@ jobs: path: ./vllm-empty fetch-depth: 1 - - name: Install vllm-project/vllm from source - working-directory: ./vllm-empty + - name: Install vllm-project/vllm run: | - VLLM_TARGET_DEVICE=empty pip install -e . + pip uninstall -y vllm + rm -rf /vllm-workspace/vllm + cp -r ./vllm-empty /vllm-workspace/vllm + VLLM_TARGET_DEVICE=empty pip install -v -e /vllm-workspace/vllm/ - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | - pip install -r requirements-dev.txt - pip install -v -e . + DEST="/vllm-workspace/vllm-ascend" + # rebase commits + git config user.email "action@github.com" + git config user.name "GitHub Action" + git fetch origin ${{ github.base_ref }} + git rebase origin/${{ github.base_ref }} + + IMAGE_SHA=$(git -C "${DEST}" log -1 --format=%H 2>/dev/null || echo "") + cp -rT . "${DEST}/" + if [ -n "$IMAGE_SHA" ] && git cat-file -e "${IMAGE_SHA}" 2>/dev/null; then + C_CHANGES=$(git diff "${IMAGE_SHA}"..HEAD --name-only -- \ + csrc/ cmake/ CMakeLists.txt setup.py requirements.txt requirements-dev.txt) + echo "[debug] C_CHANGES=${C_CHANGES:-}" + else + echo "[debug] IMAGE_SHA not found in local history (empty or unreachable), forcing reinstall" + C_CHANGES="yes" + fi + pip install -r ${DEST}/requirements-dev.txt + if [ -n "$C_CHANGES" ]; then + echo "[debug] C code / build changes detected, reinstalling vllm-ascend..." + pip install -v -e "${DEST}/" + else + echo "[debug] No C code / build changes detected, skipping reinstall." + fi + - name: Run vllm-project/vllm-ascend test (light) + working-directory: /vllm-workspace/vllm-ascend env: VLLM_WORKER_MULTIPROC_METHOD: spawn run: | @@ -215,13 +347,14 @@ jobs: e2e-2-cards-full: name: multicard-2-full if: ${{ inputs.type == 'full' }} + needs: [select-image] runs-on: linux-aarch64-a3-2 strategy: fail-fast: false matrix: part: [0] container: - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11 + image: ${{ needs.select-image.outputs.image_a3 }} env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True @@ -230,6 +363,8 @@ jobs: steps: - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v6 + with: + fetch-depth: 0 - name: Check npu and CANN info run: | npu-smi info @@ -242,14 +377,11 @@ jobs: pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y + git config --global --add safe.directory "${GITHUB_WORKSPACE}" - name: Install system dependencies run: | apt-get -y install `cat packages.txt` - apt-get -y install gcc g++ cmake libnuma-dev clang-15 - - update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 - update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 @@ -259,18 +391,44 @@ jobs: path: ./vllm-empty fetch-depth: 1 - - name: Install vllm-project/vllm from source - working-directory: ./vllm-empty + - name: Install vllm-project/vllm run: | - VLLM_TARGET_DEVICE=empty pip install -e . + pip uninstall -y vllm + rm -rf /vllm-workspace/vllm + cp -r ./vllm-empty /vllm-workspace/vllm + VLLM_TARGET_DEVICE=empty pip install -v -e /vllm-workspace/vllm/ - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | - pip install -r requirements-dev.txt - pip install -v -e . + DEST="/vllm-workspace/vllm-ascend" + # rebase commits + git config user.email "action@github.com" + git config user.name "GitHub Action" + git fetch origin ${{ github.base_ref }} + git rebase origin/${{ github.base_ref }} + + IMAGE_SHA=$(git -C "${DEST}" log -1 --format=%H 2>/dev/null || echo "") + cp -rT . "${DEST}/" + if [ -n "$IMAGE_SHA" ] && git cat-file -e "${IMAGE_SHA}" 2>/dev/null; then + C_CHANGES=$(git diff "${IMAGE_SHA}"..HEAD --name-only -- \ + csrc/ cmake/ CMakeLists.txt setup.py requirements.txt requirements-dev.txt) + echo "[debug] C_CHANGES=${C_CHANGES:-}" + else + echo "[debug] IMAGE_SHA not found in local history (empty or unreachable), forcing reinstall" + C_CHANGES="yes" + fi + pip install -r ${DEST}/requirements-dev.txt + if [ -n "$C_CHANGES" ]; then + echo "[debug] C code / build changes detected, reinstalling vllm-ascend..." + pip install -v -e "${DEST}/" + else + echo "[debug] No C code / build changes detected, skipping reinstall." + fi + - name: Run vllm-project/vllm-ascend test (full) + working-directory: /vllm-workspace/vllm-ascend env: VLLM_WORKER_MULTIPROC_METHOD: spawn run: | @@ -278,6 +436,7 @@ jobs: - name: Run vllm-project/vllm-ascend test (non triton) if: ${{ inputs.type == 'full' && matrix.part == 0 }} + working-directory: /vllm-workspace/vllm-ascend env: VLLM_WORKER_MULTIPROC_METHOD: spawn run: | @@ -287,13 +446,14 @@ jobs: e2e-4-cards-full: name: multicard-4-full if: ${{ inputs.type == 'full' }} + needs: [select-image] runs-on: linux-aarch64-a3-4 strategy: fail-fast: false matrix: part: [0] container: - image: m.daocloud.io/quay.io/ascend/cann:8.5.0-a3-ubuntu22.04-py3.11 + image: ${{ needs.select-image.outputs.image_a3 }} env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True @@ -301,6 +461,8 @@ jobs: steps: - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v6 + with: + fetch-depth: 0 - name: Check npu and CANN info run: | npu-smi info @@ -313,14 +475,11 @@ jobs: pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y + git config --global --add safe.directory "${GITHUB_WORKSPACE}" - name: Install system dependencies run: | apt-get -y install `cat packages.txt` - apt-get -y install gcc g++ cmake libnuma-dev clang-15 - - update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 - update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 @@ -330,19 +489,44 @@ jobs: path: ./vllm-empty fetch-depth: 1 - - name: Install vllm-project/vllm from source - working-directory: ./vllm-empty + - name: Install vllm-project/vllm run: | - VLLM_TARGET_DEVICE=empty pip install -e . + pip uninstall -y vllm + rm -rf /vllm-workspace/vllm + cp -r ./vllm-empty /vllm-workspace/vllm + VLLM_TARGET_DEVICE=empty pip install -v -e /vllm-workspace/vllm/ - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | - pip install -r requirements-dev.txt - pip install -v -e . + DEST="/vllm-workspace/vllm-ascend" + # rebase commits + git config user.email "action@github.com" + git config user.name "GitHub Action" + git fetch origin ${{ github.base_ref }} + git rebase origin/${{ github.base_ref }} + + IMAGE_SHA=$(git -C "${DEST}" log -1 --format=%H 2>/dev/null || echo "") + cp -rT . "${DEST}/" + if [ -n "$IMAGE_SHA" ] && git cat-file -e "${IMAGE_SHA}" 2>/dev/null; then + C_CHANGES=$(git diff "${IMAGE_SHA}"..HEAD --name-only -- \ + csrc/ cmake/ CMakeLists.txt setup.py requirements.txt requirements-dev.txt) + echo "[debug] C_CHANGES=${C_CHANGES:-}" + else + echo "[debug] IMAGE_SHA not found in local history (empty or unreachable), forcing reinstall" + C_CHANGES="yes" + fi + pip install -r ${DEST}/requirements-dev.txt + if [ -n "$C_CHANGES" ]; then + echo "[debug] C code / build changes detected, reinstalling vllm-ascend..." + pip install -v -e "${DEST}/" + else + echo "[debug] No C code / build changes detected, skipping reinstall." + fi - name: Run vllm-project/vllm-ascend test for V1 Engine + working-directory: /vllm-workspace/vllm-ascend env: VLLM_WORKER_MULTIPROC_METHOD: spawn run: | @@ -352,17 +536,24 @@ jobs: name: 310p singlecard runs-on: linux-aarch64-310p-1 if: ${{ inputs.contains_310 }} + needs: [select-image] container: - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-310p-ubuntu22.04-py3.11 + image: ${{ needs.select-image.outputs.image_310p }} env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True HF_HUB_OFFLINE: 1 steps: + - name: Checkout vllm-project/vllm-ascend repo + uses: actions/checkout@v6 + with: + fetch-depth: 0 + - name: Check npu and CANN info run: | npu-smi info cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + - name: Config mirrors run: | sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list @@ -370,14 +561,11 @@ jobs: pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y - - - name: Checkout vllm-project/vllm-ascend repo - uses: actions/checkout@v6 + git config --global --add safe.directory "${GITHUB_WORKSPACE}" - name: Install system dependencies run: | apt-get -y install `cat packages.txt` - apt-get -y install gcc g++ cmake libnuma-dev - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 @@ -387,19 +575,44 @@ jobs: path: ./vllm-empty fetch-depth: 1 - - name: Install vllm-project/vllm from source - working-directory: ./vllm-empty + - name: Install vllm-project/vllm run: | - VLLM_TARGET_DEVICE=empty pip install -e . + pip uninstall -y vllm + rm -rf /vllm-workspace/vllm + cp -r ./vllm-empty /vllm-workspace/vllm + VLLM_TARGET_DEVICE=empty pip install -v -e /vllm-workspace/vllm/ - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | - pip install -r requirements-dev.txt - pip install -v -e . + DEST="/vllm-workspace/vllm-ascend" + # rebase commits + git config user.email "action@github.com" + git config user.name "GitHub Action" + git fetch origin ${{ github.base_ref }} + git rebase origin/${{ github.base_ref }} + + IMAGE_SHA=$(git -C "${DEST}" log -1 --format=%H 2>/dev/null || echo "") + cp -rT . "${DEST}/" + if [ -n "$IMAGE_SHA" ] && git cat-file -e "${IMAGE_SHA}" 2>/dev/null; then + C_CHANGES=$(git diff "${IMAGE_SHA}"..HEAD --name-only -- \ + csrc/ cmake/ CMakeLists.txt setup.py requirements.txt requirements-dev.txt) + echo "[debug] C_CHANGES=${C_CHANGES:-}" + else + echo "[debug] IMAGE_SHA not found in local history (empty or unreachable), forcing reinstall" + C_CHANGES="yes" + fi + pip install -r ${DEST}/requirements-dev.txt + if [ -n "$C_CHANGES" ]; then + echo "[debug] C code / build changes detected, reinstalling vllm-ascend..." + pip install -v -e "${DEST}/" + else + echo "[debug] No C code / build changes detected, skipping reinstall." + fi - name: Run vllm-project/vllm-ascend test + working-directory: /vllm-workspace/vllm-ascend env: PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 VLLM_WORKER_MULTIPROC_METHOD: spawn @@ -410,17 +623,24 @@ jobs: name: 310p multicards 4cards runs-on: linux-aarch64-310p-4 if: ${{ inputs.contains_310 }} + needs: [select-image] container: - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-310p-ubuntu22.04-py3.11 + image: ${{ needs.select-image.outputs.image_310p }} env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True HF_HUB_OFFLINE: 1 steps: + - name: Checkout vllm-project/vllm-ascend repo + uses: actions/checkout@v6 + with: + fetch-depth: 0 + - name: Check npu and CANN info run: | npu-smi info cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + - name: Config mirrors run: | sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list @@ -428,14 +648,11 @@ jobs: pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y - - - name: Checkout vllm-project/vllm-ascend repo - uses: actions/checkout@v6 + git config --global --add safe.directory "${GITHUB_WORKSPACE}" - name: Install system dependencies run: | apt-get -y install `cat packages.txt` - apt-get -y install gcc g++ cmake libnuma-dev - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 @@ -445,19 +662,44 @@ jobs: path: ./vllm-empty fetch-depth: 1 - - name: Install vllm-project/vllm from source - working-directory: ./vllm-empty + - name: Install vllm-project/vllm run: | - VLLM_TARGET_DEVICE=empty pip install -e . + pip uninstall -y vllm + rm -rf /vllm-workspace/vllm + cp -r ./vllm-empty /vllm-workspace/vllm + VLLM_TARGET_DEVICE=empty pip install -v -e /vllm-workspace/vllm/ - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | - pip install -r requirements-dev.txt - pip install -v -e . + DEST="/vllm-workspace/vllm-ascend" + # rebase commits + git config user.email "action@github.com" + git config user.name "GitHub Action" + git fetch origin ${{ github.base_ref }} + git rebase origin/${{ github.base_ref }} + + IMAGE_SHA=$(git -C "${DEST}" log -1 --format=%H 2>/dev/null || echo "") + cp -rT . "${DEST}/" + if [ -n "$IMAGE_SHA" ] && git cat-file -e "${IMAGE_SHA}" 2>/dev/null; then + C_CHANGES=$(git diff "${IMAGE_SHA}"..HEAD --name-only -- \ + csrc/ cmake/ CMakeLists.txt setup.py requirements.txt requirements-dev.txt) + echo "[debug] C_CHANGES=${C_CHANGES:-}" + else + echo "[debug] IMAGE_SHA not found in local history (empty or unreachable), forcing reinstall" + C_CHANGES="yes" + fi + pip install -r ${DEST}/requirements-dev.txt + if [ -n "$C_CHANGES" ]; then + echo "[debug] C code / build changes detected, reinstalling vllm-ascend..." + pip install -v -e "${DEST}/" + else + echo "[debug] No C code / build changes detected, skipping reinstall." + fi - name: Run vllm-project/vllm-ascend test + working-directory: /vllm-workspace/vllm-ascend env: PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 VLLM_WORKER_MULTIPROC_METHOD: spawn diff --git a/.github/workflows/_schedule_image_build.yaml b/.github/workflows/_schedule_image_build.yaml index cfa6517a..dc606f7c 100644 --- a/.github/workflows/_schedule_image_build.yaml +++ b/.github/workflows/_schedule_image_build.yaml @@ -76,6 +76,26 @@ jobs: driver: docker-container use: true + - name: Set cache ref + id: cache + run: | + if [ "${{ github.ref_type }}" = "tag" ]; then + # For tag events, use the images built from source branch as cache (the tag image doesn't exist yet). + if [ -z "$branch" ]; then + branch=$(git branch -r --contains HEAD \ + | grep -v 'HEAD' \ + | sed 's|[[:space:]]*origin/||' \ + | head -1) + fi + branch="${branch:-main}" + else + # For branch push / schedule / workflow_dispatch, use the triggering branch name + branch="${{ github.ref_name }}" + fi + # Replace / with - for use in image tags + branch="${branch//\//-}" + echo "ref=quay.io/ascend/vllm-ascend:${branch}-${{ inputs.suffix }}" >> $GITHUB_OUTPUT + - name: Build and push uses: docker/build-push-action@v6 id: build @@ -89,6 +109,8 @@ jobs: outputs: type=image,name=quay.io/ascend/vllm-ascend,push-by-digest=true,name-canonical=true,push=${{ inputs.should_push }} build-args: | PIP_INDEX_URL=https://pypi.org/simple + # use previously pushed multi-arch image as cache to speed up builds + cache-from: type=registry,ref=${{ steps.cache.outputs.ref }} provenance: false - name: Export digest @@ -154,6 +176,7 @@ jobs: # which follow the rule from vLLM with prefix v # TODO(yikun): the post release might be considered as latest release tags: | + type=branch,suffix=${{ env.SUFFIX }} type=pep440,pattern={{raw}},suffix=${{ env.SUFFIX }} type=schedule,pattern=main,suffix=${{ env.SUFFIX }} type=raw,value=${{ inputs.workflow_dispatch_tag }},enable=${{ github.event_name == 'workflow_dispatch' }},suffix=${{ env.SUFFIX }} diff --git a/.github/workflows/pr_test_full.yaml b/.github/workflows/pr_test_full.yaml index 925106c9..9a40f5e3 100644 --- a/.github/workflows/pr_test_full.yaml +++ b/.github/workflows/pr_test_full.yaml @@ -81,6 +81,6 @@ jobs: uses: ./.github/workflows/_e2e_test.yaml with: vllm: ${{ matrix.vllm_version }} - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:main contains_310: false type: full diff --git a/.github/workflows/pr_test_light.yaml b/.github/workflows/pr_test_light.yaml index 938430fc..94393cf0 100644 --- a/.github/workflows/pr_test_light.yaml +++ b/.github/workflows/pr_test_light.yaml @@ -107,6 +107,6 @@ jobs: uses: ./.github/workflows/_e2e_test.yaml with: vllm: ${{ matrix.vllm_version }} - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:main contains_310: ${{ needs.changes.outputs._310_tracker == 'true' }} type: light diff --git a/.github/workflows/schedule_image_build_and_push.yaml b/.github/workflows/schedule_image_build_and_push.yaml index c0dbf8d1..d751c59a 100644 --- a/.github/workflows/schedule_image_build_and_push.yaml +++ b/.github/workflows/schedule_image_build_and_push.yaml @@ -12,12 +12,15 @@ name: Image Build and Push on: schedule: - # UTC+8: 8am, 12pm, 16pm, 22pm - - cron: '0 0,4,8,14 * * *' + # UTC+8: 8am, 10am, 12pm, 14pm, 16pm, 18pm, 22pm + - cron: '0 0,2,4,6,8,10,14 * * *' push: + branches: + # Build release branch images proactively so cache is warm when the tag is pushed + - 'releases/*' tags: - 'v*' - pull_request: + pull_request: branches: - 'main' types: [ labeled, synchronize ] diff --git a/.github/workflows/schedule_nightly_image_build.yaml b/.github/workflows/schedule_nightly_image_build.yaml index aaf56074..09e93fb5 100644 --- a/.github/workflows/schedule_nightly_image_build.yaml +++ b/.github/workflows/schedule_nightly_image_build.yaml @@ -50,6 +50,7 @@ jobs: --build-arg CANN_VERSION="8.5.0" \ --build-arg UBUNTU_VERSION="22.04" \ --build-arg PYTHON_VERSION="3.11" \ + --cache-from "$IMAGE_TAG" \ -t "$IMAGE_TAG" . echo "image-tag=$IMAGE_TAG" >> $GITHUB_OUTPUT diff --git a/.github/workflows/schedule_test_vllm_main.yaml b/.github/workflows/schedule_test_vllm_main.yaml index b60fbb0c..baaa4c94 100644 --- a/.github/workflows/schedule_test_vllm_main.yaml +++ b/.github/workflows/schedule_test_vllm_main.yaml @@ -34,6 +34,6 @@ jobs: uses: ./.github/workflows/_e2e_test.yaml with: vllm: main - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:main contains_310: false type: full diff --git a/Dockerfile b/Dockerfile index b04ae9e1..a9a58350 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,46 +19,49 @@ FROM quay.io/ascend/cann:8.5.0-910b-ubuntu22.04-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG MOONCAKE_TAG="v0.3.8.post1" -ARG SOC_VERSION="ascend910b1" - -# Define environments -ENV DEBIAN_FRONTEND=noninteractive -ENV SOC_VERSION=$SOC_VERSION \ - TASK_QUEUE_ENABLE=1 \ - OMP_NUM_THREADS=1 WORKDIR /workspace -COPY . /vllm-workspace/vllm-ascend/ +COPY ./tools/mooncake_installer.sh /vllm-workspace/ -# Install Mooncake dependencies +# Install clang-15 (for triton-ascend) and Mooncake RUN apt-get update -y && \ - apt-get install -y git vim wget net-tools gcc g++ cmake libnuma-dev libjemalloc2 && \ + apt-get install -y git vim wget net-tools gcc g++ cmake libnuma-dev libjemalloc2 clang-15 && \ + update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 && \ + update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 && \ git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \ - cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \ + mv /vllm-workspace/mooncake_installer.sh /vllm-workspace/Mooncake/ && \ cd /vllm-workspace/Mooncake && bash mooncake_installer.sh -y && \ ARCH=$(uname -m) && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/devlib:/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/lib64:$LD_LIBRARY_PATH && \ mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON && \ make -j$(nproc) && make install && \ - rm -fr /vllm-workspace/Mooncake/build && \ + rm -rf /vllm-workspace/Mooncake/build && \ rm -rf /var/cache/apt/* && \ rm -rf /var/lib/apt/lists/* -RUN pip config set global.index-url ${PIP_INDEX_URL} +# Install modelscope (for fast download) and ray (for multinode) +RUN pip config set global.index-url ${PIP_INDEX_URL} && \ + python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ + python3 -m pip cache purge # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git ARG VLLM_TAG=v0.16.0 -RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm -# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. -RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ +RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm && \ + VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip uninstall -y triton && \ python3 -m pip cache purge # Install vllm-ascend -# Append `libascend_hal.so` path (devlib) to LD_LIBRARY_PATH +ARG SOC_VERSION="ascend910b1" +ENV DEBIAN_FRONTEND=noninteractive +ENV SOC_VERSION=$SOC_VERSION \ + TASK_QUEUE_ENABLE=1 \ + OMP_NUM_THREADS=1 +COPY . /vllm-workspace/vllm-ascend/ + RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ source /usr/local/Ascend/nnal/atb/set_env.sh && \ @@ -66,18 +69,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge -# Install clang-15 (for triton-ascend) -RUN apt-get update -y && \ - apt-get -y install clang-15 && \ - update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 && \ - update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 && \ - rm -rf /var/cache/apt/* && \ - rm -rf /var/lib/apt/lists/* - -# Install modelscope (for fast download) and ray (for multinode) -RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ - python3 -m pip cache purge - +# Append `libascend_hal.so` path (devlib) to LD_LIBRARY_PATH RUN echo "export LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc CMD ["/bin/bash"] diff --git a/Dockerfile.310p b/Dockerfile.310p index ec5772a2..0122ba5b 100644 --- a/Dockerfile.310p +++ b/Dockerfile.310p @@ -18,37 +18,36 @@ FROM quay.io/ascend/cann:8.5.0-310p-ubuntu22.04-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" -ARG SOC_VERSION="ascend310p1" -# Define environments -ENV DEBIAN_FRONTEND=noninteractive -ENV SOC_VERSION=$SOC_VERSION \ - TASK_QUEUE_ENABLE=1 \ - OMP_NUM_THREADS=1 - +WORKDIR /workspace RUN apt-get update -y && \ apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev libjemalloc2 && \ rm -rf /var/cache/apt/* && \ rm -rf /var/lib/apt/lists/* -WORKDIR /workspace - -COPY . /vllm-workspace/vllm-ascend/ - -RUN pip config set global.index-url ${PIP_INDEX_URL} +# Install modelscope (for fast download) and ray (for multinode) +RUN pip config set global.index-url ${PIP_INDEX_URL} && \ + python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ + python3 -m pip cache purge # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git ARG VLLM_TAG=v0.16.0 -RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm -# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. -RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ +RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm && \ + # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. + VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip uninstall -y triton && \ python3 -m pip cache purge # Install vllm-ascend -# Append `libascend_hal.so` path (devlib) to LD_LIBRARY_PATH +ARG SOC_VERSION="ascend310p1" +ENV DEBIAN_FRONTEND=noninteractive +ENV SOC_VERSION=$SOC_VERSION \ + TASK_QUEUE_ENABLE=1 \ + OMP_NUM_THREADS=1 +COPY . /vllm-workspace/vllm-ascend/ + RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ source /usr/local/Ascend/nnal/atb/set_env.sh && \ @@ -56,10 +55,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge -# Install modelscope (for fast download) and ray (for multinode) -RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ - python3 -m pip cache purge - +# Append `libascend_hal.so` path (devlib) to LD_LIBRARY_PATH RUN echo "export LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc CMD ["/bin/bash"] diff --git a/Dockerfile.310p.openEuler b/Dockerfile.310p.openEuler index 92f94199..174c47dd 100644 --- a/Dockerfile.310p.openEuler +++ b/Dockerfile.310p.openEuler @@ -18,32 +18,34 @@ FROM quay.io/ascend/cann:8.5.0-310p-openeuler24.03-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" -ARG SOC_VERSION="ascend310p1" -ENV SOC_VERSION=$SOC_VERSION \ - TASK_QUEUE_ENABLE=1 \ - OMP_NUM_THREADS=1 +WORKDIR /workspace RUN yum update -y && \ yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc && \ rm -rf /var/cache/yum -RUN pip config set global.index-url ${PIP_INDEX_URL} - -WORKDIR /workspace - -COPY . /vllm-workspace/vllm-ascend/ +# Install modelscope (for fast download) and ray (for multinode) +RUN pip config set global.index-url ${PIP_INDEX_URL} && \ + python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ + python3 -m pip cache purge # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git ARG VLLM_TAG=v0.16.0 -RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm -# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. -RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ +RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm && \ + # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. + VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip uninstall -y triton && \ python3 -m pip cache purge # Install vllm-ascend +ARG SOC_VERSION="ascend310p1" +ENV SOC_VERSION=$SOC_VERSION \ + TASK_QUEUE_ENABLE=1 \ + OMP_NUM_THREADS=1 +COPY . /vllm-workspace/vllm-ascend/ + RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ source /usr/local/Ascend/nnal/atb/set_env.sh && \ @@ -52,10 +54,6 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge -# Install modelscope (for fast download) and ray (for multinode) -RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ - python3 -m pip cache purge - RUN echo "export LD_PRELOAD=/usr/lib64/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc CMD ["/bin/bash"] diff --git a/Dockerfile.a3 b/Dockerfile.a3 index 17bd077b..5b68ae0c 100644 --- a/Dockerfile.a3 +++ b/Dockerfile.a3 @@ -19,24 +19,20 @@ FROM quay.io/ascend/cann:8.5.0-a3-ubuntu22.04-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG MOONCAKE_TAG=v0.3.8.post1 -ARG SOC_VERSION="ascend910_9391" -COPY . /vllm-workspace/vllm-ascend/ -# Define environments +COPY ./tools/mooncake_installer.sh /vllm-workspace/ + ENV DEBIAN_FRONTEND=noninteractive -ENV SOC_VERSION=$SOC_VERSION \ - TASK_QUEUE_ENABLE=1 \ - OMP_NUM_THREADS=1 - -RUN pip config set global.index-url ${PIP_INDEX_URL} WORKDIR /workspace -# Install Mooncake dependencies +# Install clang-15 (for triton-ascend) and Mooncake RUN apt-get update -y && \ - apt-get install -y git vim wget net-tools gcc g++ cmake libnuma-dev libjemalloc2 && \ + apt-get install -y git vim wget net-tools gcc g++ cmake libnuma-dev libjemalloc2 clang-15 && \ + update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 && \ + update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 && \ git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \ - cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \ + mv /vllm-workspace/mooncake_installer.sh /vllm-workspace/Mooncake/ && \ cd /vllm-workspace/Mooncake && bash mooncake_installer.sh -y && \ ARCH=$(uname -m) && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ @@ -47,17 +43,28 @@ RUN apt-get update -y && \ rm -rf /var/cache/apt/* && \ rm -rf /var/lib/apt/lists/* +# Install modelscope (for fast download) and ray (for multinode) +RUN pip config set global.index-url ${PIP_INDEX_URL} && \ + python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ + python3 -m pip cache purge + # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git ARG VLLM_TAG=v0.16.0 -RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm -# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. -RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ +RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm && \ + # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. + VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip uninstall -y triton && \ python3 -m pip cache purge # Install vllm-ascend -# Append `libascend_hal.so` path (devlib) to LD_LIBRARY_PATH +ARG SOC_VERSION="ascend910_9391" +ENV DEBIAN_FRONTEND=noninteractive +ENV SOC_VERSION=$SOC_VERSION \ + TASK_QUEUE_ENABLE=1 \ + OMP_NUM_THREADS=1 +COPY . /vllm-workspace/vllm-ascend/ + RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ source /usr/local/Ascend/nnal/atb/set_env.sh && \ @@ -65,18 +72,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge -# Install clang-15 (for triton-ascend) -RUN apt-get update -y && \ - apt-get -y install clang-15 && \ - update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 && \ - update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 && \ - rm -rf /var/cache/apt/* && \ - rm -rf /var/lib/apt/lists/* - -# Install modelscope (for fast download) and ray (for multinode) -RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ - python3 -m pip cache purge - +# Append `libascend_hal.so` path (devlib) to LD_LIBRARY_PATH RUN echo "export LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc CMD ["/bin/bash"] diff --git a/Dockerfile.a3.openEuler b/Dockerfile.a3.openEuler index 4e5b3838..75ef096d 100644 --- a/Dockerfile.a3.openEuler +++ b/Dockerfile.a3.openEuler @@ -19,24 +19,18 @@ FROM quay.io/ascend/cann:8.5.0-a3-openeuler24.03-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG MOONCAKE_TAG="v0.3.8.post1" -ARG SOC_VERSION="ascend910_9391" - -ENV SOC_VERSION=$SOC_VERSION \ - TASK_QUEUE_ENABLE=1 \ - OMP_NUM_THREADS=1 - -RUN pip config set global.index-url ${PIP_INDEX_URL} WORKDIR /workspace -COPY . /vllm-workspace/vllm-ascend/ +COPY ./tools/mooncake_installer.sh /vllm-workspace/vllm-ascend/ SHELL ["/bin/bash", "-c"] +# Install clang (for triton-ascend) and Mooncake RUN yum update -y && \ - yum install -y git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc && \ + yum install -y git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc clang && \ git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \ - cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \ + cp /vllm-workspace/mooncake_installer.sh /vllm-workspace/Mooncake/ && \ ARCH=$(uname -m) && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/devlib:/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/lib64:$LD_LIBRARY_PATH && \ @@ -48,16 +42,27 @@ RUN yum update -y && \ rm -fr /vllm-workspace/Mooncake/build && \ rm -rf /var/cache/yum/* +# Install modelscope (for fast download) and ray (for multinode) +RUN pip config set global.index-url ${PIP_INDEX_URL} && \ + python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ + python3 -m pip cache purge + # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git ARG VLLM_TAG=v0.16.0 -RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm -# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. -RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ +RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm && \ + # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. + VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip uninstall -y triton && \ python3 -m pip cache purge # Install vllm-ascend +ARG SOC_VERSION="ascend910_9391" +ENV SOC_VERSION=$SOC_VERSION \ + TASK_QUEUE_ENABLE=1 \ + OMP_NUM_THREADS=1 +COPY . /vllm-workspace/vllm-ascend/ + RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ source /usr/local/Ascend/nnal/atb/set_env.sh && \ @@ -66,15 +71,6 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge -# Install clang (for triton-ascend) -RUN yum update -y && \ - yum install -y clang && \ - rm -rf /var/cache/yum/* - -# Install modelscope (for fast download) and ray (for multinode) -RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ - python3 -m pip cache purge - RUN echo "export LD_PRELOAD=/usr/lib64/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc CMD ["/bin/bash"] diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler index 825b8a24..2f2fe086 100644 --- a/Dockerfile.openEuler +++ b/Dockerfile.openEuler @@ -18,25 +18,18 @@ FROM quay.io/ascend/cann:8.5.0-910b-openeuler24.03-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" -ARG MOONCAKE_TAG="v0.3.8.post1" -ARG SOC_VERSION="ascend910b1" - -ENV SOC_VERSION=$SOC_VERSION \ - TASK_QUEUE_ENABLE=1 \ - OMP_NUM_THREADS=1 - -RUN pip config set global.index-url ${PIP_INDEX_URL} WORKDIR /workspace -COPY . /vllm-workspace/vllm-ascend/ +COPY ./tools/mooncake_installer.sh /vllm-workspace/vllm-ascend/ SHELL ["/bin/bash", "-c"] +# Install clang (for triton-ascend) and Mooncake RUN yum update -y && \ - yum install -y git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc && \ + yum install -y git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc clang && \ git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \ - cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \ + mv /vllm-workspace/mooncake_installer.sh /vllm-workspace/Mooncake/ && \ ARCH=$(uname -m) && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/devlib:/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/lib64:$LD_LIBRARY_PATH && \ @@ -48,16 +41,28 @@ RUN yum update -y && \ rm -fr /vllm-workspace/Mooncake/build && \ rm -rf /var/cache/yum/* +# Install modelscope (for fast download) and ray (for multinode) +RUN pip config set global.index-url ${PIP_INDEX_URL} && \ + python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ + python3 -m pip cache purge + # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git ARG VLLM_TAG=v0.16.0 -RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm -# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. -RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ +RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm && \ + # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. + VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip uninstall -y triton && \ python3 -m pip cache purge # Install vllm-ascend +ARG MOONCAKE_TAG="v0.3.8.post1" +ARG SOC_VERSION="ascend910b1" +ENV SOC_VERSION=$SOC_VERSION \ + TASK_QUEUE_ENABLE=1 \ + OMP_NUM_THREADS=1 +COPY . /vllm-workspace/vllm-ascend/ + RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ source /usr/local/Ascend/nnal/atb/set_env.sh && \ @@ -66,15 +71,6 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge -# Install clang (for triton-ascend) -RUN yum update -y && \ - yum install -y clang && \ - rm -rf /var/cache/yum/* - -# Install modelscope (for fast download) and ray (for multinode) -RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ - python3 -m pip cache purge - RUN echo "export LD_PRELOAD=/usr/lib64/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc CMD ["/bin/bash"]