diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml index 521e0d62..da0849b5 100644 --- a/.github/workflows/_e2e_test.yaml +++ b/.github/workflows/_e2e_test.yaml @@ -17,71 +17,16 @@ on: type: boolean jobs: - select-image: - runs-on: linux-aarch64-a2b3-0 - outputs: - image: ${{ steps.select.outputs.image }} - image_a3: ${{ steps.select.outputs.image_a3 }} - image_310p: ${{ steps.select.outputs.image_310p }} - steps: - - name: Select image based on base branch - id: select - env: - IMAGE_NAMESPACE: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend - run: | - BRANCH="${{ github.base_ref }}" - BRANCH_TAG="${BRANCH//\//-}" - MAIN_IMAGE="${{ inputs.image }}" - MAIN_IMAGE_A3="${{ inputs.image }}-a3" - MAIN_IMAGE_310P="${{ inputs.image }}-310p" - if [ "$BRANCH_TAG" = "main" ]; then - echo "Target branch is main, using main images: ${MAIN_IMAGE} / ${MAIN_IMAGE_A3} / ${MAIN_IMAGE_310P}" - echo "image=${MAIN_IMAGE}" >> $GITHUB_OUTPUT - echo "image_a3=${MAIN_IMAGE_A3}" >> $GITHUB_OUTPUT - echo "image_310p=${MAIN_IMAGE_310P}" >> $GITHUB_OUTPUT - exit 0 - fi - - # while target branch is not main - BRANCH_IMAGE="${IMAGE_NAMESPACE}:${BRANCH_TAG}" - BRANCH_IMAGE_A3="${IMAGE_NAMESPACE}:${BRANCH_TAG}-a3" - BRANCH_IMAGE_310P="${IMAGE_NAMESPACE}:${BRANCH_TAG}-310p" - # Check if branch-specific A2 image exists via IMAGE_NAMESPACE, fallback to main if not - if docker manifest inspect "${BRANCH_IMAGE}" > /dev/null 2>&1; then - echo "Using branch image: ${BRANCH_IMAGE}" - echo "image=${BRANCH_IMAGE}" >> $GITHUB_OUTPUT - else - echo "Branch image not found, falling back to ${MAIN_IMAGE}" - echo "image=${MAIN_IMAGE}" >> $GITHUB_OUTPUT - fi - # Check if branch-specific A3 image exists via IMAGE_NAMESPACE, fallback to main if not - if docker manifest inspect "${BRANCH_IMAGE_A3}" > /dev/null 2>&1; then - echo "Using branch A3 image: ${BRANCH_IMAGE_A3}" - echo "image_a3=${BRANCH_IMAGE_A3}" >> $GITHUB_OUTPUT - else - echo "Branch A3 image not found, falling back to ${MAIN_IMAGE_A3}" - echo "image_a3=${MAIN_IMAGE_A3}" >> $GITHUB_OUTPUT - fi - # Check if branch-specific 310P image exists via IMAGE_NAMESPACE, fallback to main if not - if docker manifest inspect "${BRANCH_IMAGE_310P}" > /dev/null 2>&1; then - echo "Using branch 310P image: ${BRANCH_IMAGE_310P}" - echo "image_310p=${BRANCH_IMAGE_310P}" >> $GITHUB_OUTPUT - else - echo "Branch 310P image not found, falling back to ${MAIN_IMAGE_310P}" - echo "image_310p=${MAIN_IMAGE_310P}" >> $GITHUB_OUTPUT - fi - e2e-light: name: singlecard-light if: ${{ inputs.type == 'light' }} - needs: [select-image] runs-on: linux-aarch64-a2b3-1 strategy: fail-fast: false matrix: part: [0] container: - image: ${{ needs.select-image.outputs.image }} + image: ${{ inputs.image }} env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True @@ -89,8 +34,6 @@ jobs: steps: - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v6 - with: - fetch-depth: 0 - name: Check npu and CANN info run: | npu-smi info @@ -103,11 +46,14 @@ jobs: pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y - git config --global --add safe.directory "${GITHUB_WORKSPACE}" - name: Install system dependencies run: | apt-get -y install `cat packages.txt` + apt-get -y install gcc g++ cmake libnuma-dev clang-15 + + update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 + update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 @@ -117,39 +63,19 @@ jobs: path: ./vllm-empty fetch-depth: 1 - - name: Install vllm-project/vllm + - name: Install vllm-project/vllm from source + working-directory: ./vllm-empty run: | - pip uninstall -y vllm - rm -rf /vllm-workspace/vllm - cp -r ./vllm-empty /vllm-workspace/vllm - VLLM_TARGET_DEVICE=empty pip install -v -e /vllm-workspace/vllm/ + VLLM_TARGET_DEVICE=empty pip install -e . - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | - DEST="/vllm-workspace/vllm-ascend" - IMAGE_SHA=$(git -C "${DEST}" log -1 --format=%H 2>/dev/null || echo "") - cp -rT . "${DEST}/" - if [ -n "$IMAGE_SHA" ] && git cat-file -e "${IMAGE_SHA}" 2>/dev/null; then - C_CHANGES=$(git diff "${IMAGE_SHA}"..HEAD --name-only -- \ - csrc/ cmake/ CMakeLists.txt setup.py requirements.txt requirements-dev.txt) - echo "[debug] C_CHANGES=${C_CHANGES:-}" - else - echo "[debug] IMAGE_SHA not found in local history (empty or unreachable), forcing reinstall" - C_CHANGES="yes" - fi - - pip install -r ${DEST}/requirements-dev.txt - if [ -n "$C_CHANGES" ]; then - echo "[debug] C code / build changes detected, reinstalling vllm-ascend..." - pip install -v -e "${DEST}/" - else - echo "[debug] No C code / build changes detected, skipping reinstall." - fi + pip install -r requirements-dev.txt + pip install -v -e . - name: Run vllm-project/vllm-ascend test - working-directory: /vllm-workspace/vllm-ascend env: PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 VLLM_WORKER_MULTIPROC_METHOD: spawn @@ -159,14 +85,13 @@ jobs: e2e-full: name: singlecard-full if: ${{ inputs.type == 'full' }} - needs: [select-image] runs-on: linux-aarch64-a2b3-1 strategy: fail-fast: false matrix: part: [0, 1] container: - image: ${{ needs.select-image.outputs.image }} + image: ${{ inputs.image }} env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True @@ -175,8 +100,6 @@ jobs: steps: - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v6 - with: - fetch-depth: 0 - name: Check npu and CANN info run: | @@ -190,11 +113,14 @@ jobs: pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y - git config --global --add safe.directory "${GITHUB_WORKSPACE}" - name: Install system dependencies run: | apt-get -y install `cat packages.txt` + apt-get -y install gcc g++ cmake libnuma-dev clang-15 + + update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 + update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 @@ -204,38 +130,18 @@ jobs: path: ./vllm-empty fetch-depth: 1 - - name: Install vllm-project/vllm + - name: Install vllm-project/vllm from source + working-directory: ./vllm-empty run: | - pip uninstall -y vllm - rm -rf /vllm-workspace/vllm - cp -r ./vllm-empty /vllm-workspace/vllm - VLLM_TARGET_DEVICE=empty pip install -v -e /vllm-workspace/vllm/ + VLLM_TARGET_DEVICE=empty pip install -e . - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | - DEST="/vllm-workspace/vllm-ascend" - IMAGE_SHA=$(git -C "${DEST}" log -1 --format=%H 2>/dev/null || echo "") - cp -rT . "${DEST}/" - if [ -n "$IMAGE_SHA" ] && git cat-file -e "${IMAGE_SHA}" 2>/dev/null; then - C_CHANGES=$(git diff "${IMAGE_SHA}"..HEAD --name-only -- \ - csrc/ cmake/ CMakeLists.txt setup.py requirements.txt requirements-dev.txt) - echo "[debug] C_CHANGES=${C_CHANGES:-}" - else - echo "[debug] IMAGE_SHA not found in local history (empty or unreachable), forcing reinstall" - C_CHANGES="yes" - fi - pip install -r ${DEST}/requirements-dev.txt - if [ -n "$C_CHANGES" ]; then - echo "[debug] C code / build changes detected, reinstalling vllm-ascend..." - pip install -v -e "${DEST}/" - else - echo "[debug] No C code / build changes detected, skipping reinstall." - fi - + pip install -r requirements-dev.txt + pip install -v -e . - name: Run e2e test - working-directory: /vllm-workspace/vllm-ascend env: VLLM_WORKER_MULTIPROC_METHOD: spawn PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 @@ -245,14 +151,13 @@ jobs: e2e-2-cards-light: name: multicard-2-light if: ${{ inputs.type == 'light' }} - needs: [select-image] runs-on: linux-aarch64-a3-2 strategy: fail-fast: false matrix: part: [0] container: - image: ${{ needs.select-image.outputs.image_a3 }} + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11 env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True @@ -261,8 +166,6 @@ jobs: steps: - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v6 - with: - fetch-depth: 0 - name: Check npu and CANN info run: | npu-smi info @@ -275,11 +178,14 @@ jobs: pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y - git config --global --add safe.directory "${GITHUB_WORKSPACE}" - name: Install system dependencies run: | apt-get -y install `cat packages.txt` + apt-get -y install gcc g++ cmake libnuma-dev clang-15 + + update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 + update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 @@ -289,38 +195,18 @@ jobs: path: ./vllm-empty fetch-depth: 1 - - name: Install vllm-project/vllm + - name: Install vllm-project/vllm from source + working-directory: ./vllm-empty run: | - pip uninstall -y vllm - rm -rf /vllm-workspace/vllm - cp -r ./vllm-empty /vllm-workspace/vllm - VLLM_TARGET_DEVICE=empty pip install -v -e /vllm-workspace/vllm/ + VLLM_TARGET_DEVICE=empty pip install -e . - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | - DEST="/vllm-workspace/vllm-ascend" - IMAGE_SHA=$(git -C "${DEST}" log -1 --format=%H 2>/dev/null || echo "") - cp -rT . "${DEST}/" - if [ -n "$IMAGE_SHA" ] && git cat-file -e "${IMAGE_SHA}" 2>/dev/null; then - C_CHANGES=$(git diff "${IMAGE_SHA}"..HEAD --name-only -- \ - csrc/ cmake/ CMakeLists.txt setup.py requirements.txt requirements-dev.txt) - echo "[debug] C_CHANGES=${C_CHANGES:-}" - else - echo "[debug] IMAGE_SHA not found in local history (empty or unreachable), forcing reinstall" - C_CHANGES="yes" - fi - pip install -r ${DEST}/requirements-dev.txt - if [ -n "$C_CHANGES" ]; then - echo "[debug] C code / build changes detected, reinstalling vllm-ascend..." - pip install -v -e "${DEST}/" - else - echo "[debug] No C code / build changes detected, skipping reinstall." - fi - + pip install -r requirements-dev.txt + pip install -v -e . - name: Run vllm-project/vllm-ascend test (light) - working-directory: /vllm-workspace/vllm-ascend env: VLLM_WORKER_MULTIPROC_METHOD: spawn run: | @@ -329,14 +215,13 @@ jobs: e2e-2-cards-full: name: multicard-2-full if: ${{ inputs.type == 'full' }} - needs: [select-image] runs-on: linux-aarch64-a3-2 strategy: fail-fast: false matrix: part: [0] container: - image: ${{ needs.select-image.outputs.image_a3 }} + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11 env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True @@ -345,8 +230,6 @@ jobs: steps: - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v6 - with: - fetch-depth: 0 - name: Check npu and CANN info run: | npu-smi info @@ -359,11 +242,14 @@ jobs: pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y - git config --global --add safe.directory "${GITHUB_WORKSPACE}" - name: Install system dependencies run: | apt-get -y install `cat packages.txt` + apt-get -y install gcc g++ cmake libnuma-dev clang-15 + + update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 + update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 @@ -373,38 +259,18 @@ jobs: path: ./vllm-empty fetch-depth: 1 - - name: Install vllm-project/vllm + - name: Install vllm-project/vllm from source + working-directory: ./vllm-empty run: | - pip uninstall -y vllm - rm -rf /vllm-workspace/vllm - cp -r ./vllm-empty /vllm-workspace/vllm - VLLM_TARGET_DEVICE=empty pip install -v -e /vllm-workspace/vllm/ + VLLM_TARGET_DEVICE=empty pip install -e . - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | - DEST="/vllm-workspace/vllm-ascend" - IMAGE_SHA=$(git -C "${DEST}" log -1 --format=%H 2>/dev/null || echo "") - cp -rT . "${DEST}/" - if [ -n "$IMAGE_SHA" ] && git cat-file -e "${IMAGE_SHA}" 2>/dev/null; then - C_CHANGES=$(git diff "${IMAGE_SHA}"..HEAD --name-only -- \ - csrc/ cmake/ CMakeLists.txt setup.py requirements.txt requirements-dev.txt) - echo "[debug] C_CHANGES=${C_CHANGES:-}" - else - echo "[debug] IMAGE_SHA not found in local history (empty or unreachable), forcing reinstall" - C_CHANGES="yes" - fi - pip install -r ${DEST}/requirements-dev.txt - if [ -n "$C_CHANGES" ]; then - echo "[debug] C code / build changes detected, reinstalling vllm-ascend..." - pip install -v -e "${DEST}/" - else - echo "[debug] No C code / build changes detected, skipping reinstall." - fi - + pip install -r requirements-dev.txt + pip install -v -e . - name: Run vllm-project/vllm-ascend test (full) - working-directory: /vllm-workspace/vllm-ascend env: VLLM_WORKER_MULTIPROC_METHOD: spawn run: | @@ -412,7 +278,6 @@ jobs: - name: Run vllm-project/vllm-ascend test (non triton) if: ${{ inputs.type == 'full' && matrix.part == 0 }} - working-directory: /vllm-workspace/vllm-ascend env: VLLM_WORKER_MULTIPROC_METHOD: spawn run: | @@ -422,14 +287,13 @@ jobs: e2e-4-cards-full: name: multicard-4-full if: ${{ inputs.type == 'full' }} - needs: [select-image] runs-on: linux-aarch64-a3-4 strategy: fail-fast: false matrix: part: [0] container: - image: ${{ needs.select-image.outputs.image_a3 }} + image: m.daocloud.io/quay.io/ascend/cann:8.5.0-a3-ubuntu22.04-py3.11 env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True @@ -437,8 +301,6 @@ jobs: steps: - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v6 - with: - fetch-depth: 0 - name: Check npu and CANN info run: | npu-smi info @@ -451,11 +313,14 @@ jobs: pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y - git config --global --add safe.directory "${GITHUB_WORKSPACE}" - name: Install system dependencies run: | apt-get -y install `cat packages.txt` + apt-get -y install gcc g++ cmake libnuma-dev clang-15 + + update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 + update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 @@ -465,38 +330,19 @@ jobs: path: ./vllm-empty fetch-depth: 1 - - name: Install vllm-project/vllm + - name: Install vllm-project/vllm from source + working-directory: ./vllm-empty run: | - pip uninstall -y vllm - rm -rf /vllm-workspace/vllm - cp -r ./vllm-empty /vllm-workspace/vllm - VLLM_TARGET_DEVICE=empty pip install -v -e /vllm-workspace/vllm/ + VLLM_TARGET_DEVICE=empty pip install -e . - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | - DEST="/vllm-workspace/vllm-ascend" - IMAGE_SHA=$(git -C "${DEST}" log -1 --format=%H 2>/dev/null || echo "") - cp -rT . "${DEST}/" - if [ -n "$IMAGE_SHA" ] && git cat-file -e "${IMAGE_SHA}" 2>/dev/null; then - C_CHANGES=$(git diff "${IMAGE_SHA}"..HEAD --name-only -- \ - csrc/ cmake/ CMakeLists.txt setup.py requirements.txt requirements-dev.txt) - echo "[debug] C_CHANGES=${C_CHANGES:-}" - else - echo "[debug] IMAGE_SHA not found in local history (empty or unreachable), forcing reinstall" - C_CHANGES="yes" - fi - pip install -r ${DEST}/requirements-dev.txt - if [ -n "$C_CHANGES" ]; then - echo "[debug] C code / build changes detected, reinstalling vllm-ascend..." - pip install -v -e "${DEST}/" - else - echo "[debug] No C code / build changes detected, skipping reinstall." - fi + pip install -r requirements-dev.txt + pip install -v -e . - name: Run vllm-project/vllm-ascend test for V1 Engine - working-directory: /vllm-workspace/vllm-ascend env: VLLM_WORKER_MULTIPROC_METHOD: spawn run: | @@ -506,24 +352,17 @@ jobs: name: 310p singlecard runs-on: linux-aarch64-310p-1 if: ${{ inputs.contains_310 }} - needs: [select-image] container: - image: ${{ needs.select-image.outputs.image_310p }} + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-310p-ubuntu22.04-py3.11 env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True HF_HUB_OFFLINE: 1 steps: - - name: Checkout vllm-project/vllm-ascend repo - uses: actions/checkout@v6 - with: - fetch-depth: 0 - - name: Check npu and CANN info run: | npu-smi info cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - - name: Config mirrors run: | sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list @@ -531,11 +370,14 @@ jobs: pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y - git config --global --add safe.directory "${GITHUB_WORKSPACE}" + + - name: Checkout vllm-project/vllm-ascend repo + uses: actions/checkout@v6 - name: Install system dependencies run: | apt-get -y install `cat packages.txt` + apt-get -y install gcc g++ cmake libnuma-dev - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 @@ -545,38 +387,19 @@ jobs: path: ./vllm-empty fetch-depth: 1 - - name: Install vllm-project/vllm + - name: Install vllm-project/vllm from source + working-directory: ./vllm-empty run: | - pip uninstall -y vllm - rm -rf /vllm-workspace/vllm - cp -r ./vllm-empty /vllm-workspace/vllm - VLLM_TARGET_DEVICE=empty pip install -v -e /vllm-workspace/vllm/ + VLLM_TARGET_DEVICE=empty pip install -e . - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | - DEST="/vllm-workspace/vllm-ascend" - IMAGE_SHA=$(git -C "${DEST}" log -1 --format=%H 2>/dev/null || echo "") - cp -rT . "${DEST}/" - if [ -n "$IMAGE_SHA" ] && git cat-file -e "${IMAGE_SHA}" 2>/dev/null; then - C_CHANGES=$(git diff "${IMAGE_SHA}"..HEAD --name-only -- \ - csrc/ cmake/ CMakeLists.txt setup.py requirements.txt requirements-dev.txt) - echo "[debug] C_CHANGES=${C_CHANGES:-}" - else - echo "[debug] IMAGE_SHA not found in local history (empty or unreachable), forcing reinstall" - C_CHANGES="yes" - fi - pip install -r ${DEST}/requirements-dev.txt - if [ -n "$C_CHANGES" ]; then - echo "[debug] C code / build changes detected, reinstalling vllm-ascend..." - pip install -v -e "${DEST}/" - else - echo "[debug] No C code / build changes detected, skipping reinstall." - fi + pip install -r requirements-dev.txt + pip install -v -e . - name: Run vllm-project/vllm-ascend test - working-directory: /vllm-workspace/vllm-ascend env: PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 VLLM_WORKER_MULTIPROC_METHOD: spawn @@ -587,24 +410,17 @@ jobs: name: 310p multicards 4cards runs-on: linux-aarch64-310p-4 if: ${{ inputs.contains_310 }} - needs: [select-image] container: - image: ${{ needs.select-image.outputs.image_310p }} + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-310p-ubuntu22.04-py3.11 env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True HF_HUB_OFFLINE: 1 steps: - - name: Checkout vllm-project/vllm-ascend repo - uses: actions/checkout@v6 - with: - fetch-depth: 0 - - name: Check npu and CANN info run: | npu-smi info cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - - name: Config mirrors run: | sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list @@ -612,11 +428,14 @@ jobs: pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y - git config --global --add safe.directory "${GITHUB_WORKSPACE}" + + - name: Checkout vllm-project/vllm-ascend repo + uses: actions/checkout@v6 - name: Install system dependencies run: | apt-get -y install `cat packages.txt` + apt-get -y install gcc g++ cmake libnuma-dev - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 @@ -626,38 +445,19 @@ jobs: path: ./vllm-empty fetch-depth: 1 - - name: Install vllm-project/vllm + - name: Install vllm-project/vllm from source + working-directory: ./vllm-empty run: | - pip uninstall -y vllm - rm -rf /vllm-workspace/vllm - cp -r ./vllm-empty /vllm-workspace/vllm - VLLM_TARGET_DEVICE=empty pip install -v -e /vllm-workspace/vllm/ + VLLM_TARGET_DEVICE=empty pip install -e . - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | - DEST="/vllm-workspace/vllm-ascend" - IMAGE_SHA=$(git -C "${DEST}" log -1 --format=%H 2>/dev/null || echo "") - cp -rT . "${DEST}/" - if [ -n "$IMAGE_SHA" ] && git cat-file -e "${IMAGE_SHA}" 2>/dev/null; then - C_CHANGES=$(git diff "${IMAGE_SHA}"..HEAD --name-only -- \ - csrc/ cmake/ CMakeLists.txt setup.py requirements.txt requirements-dev.txt) - echo "[debug] C_CHANGES=${C_CHANGES:-}" - else - echo "[debug] IMAGE_SHA not found in local history (empty or unreachable), forcing reinstall" - C_CHANGES="yes" - fi - pip install -r ${DEST}/requirements-dev.txt - if [ -n "$C_CHANGES" ]; then - echo "[debug] C code / build changes detected, reinstalling vllm-ascend..." - pip install -v -e "${DEST}/" - else - echo "[debug] No C code / build changes detected, skipping reinstall." - fi + pip install -r requirements-dev.txt + pip install -v -e . - name: Run vllm-project/vllm-ascend test - working-directory: /vllm-workspace/vllm-ascend env: PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 VLLM_WORKER_MULTIPROC_METHOD: spawn diff --git a/.github/workflows/_schedule_image_build.yaml b/.github/workflows/_schedule_image_build.yaml index 60bb9a29..cfa6517a 100644 --- a/.github/workflows/_schedule_image_build.yaml +++ b/.github/workflows/_schedule_image_build.yaml @@ -76,28 +76,6 @@ jobs: driver: docker-container use: true - - name: Set cache ref - id: cache - run: | - if [ "${{ github.ref_type }}" = "tag" ]; then - # For tag events, use the images built from source branch as cache (the tag image doesn't exist yet). - if [ -z "$branch" ]; then - branch=$(git branch -r --contains HEAD \ - | grep -v 'HEAD' \ - | sed 's|[[:space:]]*origin/||' \ - | head -1) - fi - branch="${branch:-main}" - else - # For PR events github.ref_name is "/merge" which has no cached image; - # use base_ref (target branch) instead. For push/schedule, base_ref is empty so - # fall back to ref_name which is the actual branch name. - branch="${{ github.base_ref || github.ref_name }}" - fi - # Replace / with - for use in image tags - branch="${branch//\//-}" - echo "ref=quay.io/ascend/vllm-ascend:${branch}-${{ inputs.suffix }}" >> $GITHUB_OUTPUT - - name: Build and push uses: docker/build-push-action@v6 id: build @@ -111,8 +89,6 @@ jobs: outputs: type=image,name=quay.io/ascend/vllm-ascend,push-by-digest=true,name-canonical=true,push=${{ inputs.should_push }} build-args: | PIP_INDEX_URL=https://pypi.org/simple - # use previously pushed multi-arch image as cache to speed up builds - cache-from: type=registry,ref=${{ steps.cache.outputs.ref }} provenance: false - name: Export digest @@ -178,7 +154,6 @@ jobs: # which follow the rule from vLLM with prefix v # TODO(yikun): the post release might be considered as latest release tags: | - type=ref,event=branch,suffix=${{ env.SUFFIX }} type=pep440,pattern={{raw}},suffix=${{ env.SUFFIX }} type=schedule,pattern=main,suffix=${{ env.SUFFIX }} type=raw,value=${{ inputs.workflow_dispatch_tag }},enable=${{ github.event_name == 'workflow_dispatch' }},suffix=${{ env.SUFFIX }} diff --git a/.github/workflows/pr_test_full.yaml b/.github/workflows/pr_test_full.yaml index 9a40f5e3..925106c9 100644 --- a/.github/workflows/pr_test_full.yaml +++ b/.github/workflows/pr_test_full.yaml @@ -81,6 +81,6 @@ jobs: uses: ./.github/workflows/_e2e_test.yaml with: vllm: ${{ matrix.vllm_version }} - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:main + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11 contains_310: false type: full diff --git a/.github/workflows/pr_test_light.yaml b/.github/workflows/pr_test_light.yaml index 94393cf0..938430fc 100644 --- a/.github/workflows/pr_test_light.yaml +++ b/.github/workflows/pr_test_light.yaml @@ -107,6 +107,6 @@ jobs: uses: ./.github/workflows/_e2e_test.yaml with: vllm: ${{ matrix.vllm_version }} - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:main + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11 contains_310: ${{ needs.changes.outputs._310_tracker == 'true' }} type: light diff --git a/.github/workflows/schedule_image_build_and_push.yaml b/.github/workflows/schedule_image_build_and_push.yaml index d751c59a..c0dbf8d1 100644 --- a/.github/workflows/schedule_image_build_and_push.yaml +++ b/.github/workflows/schedule_image_build_and_push.yaml @@ -12,15 +12,12 @@ name: Image Build and Push on: schedule: - # UTC+8: 8am, 10am, 12pm, 14pm, 16pm, 18pm, 22pm - - cron: '0 0,2,4,6,8,10,14 * * *' + # UTC+8: 8am, 12pm, 16pm, 22pm + - cron: '0 0,4,8,14 * * *' push: - branches: - # Build release branch images proactively so cache is warm when the tag is pushed - - 'releases/*' tags: - 'v*' - pull_request: + pull_request: branches: - 'main' types: [ labeled, synchronize ] diff --git a/.github/workflows/schedule_nightly_image_build.yaml b/.github/workflows/schedule_nightly_image_build.yaml index 09e93fb5..aaf56074 100644 --- a/.github/workflows/schedule_nightly_image_build.yaml +++ b/.github/workflows/schedule_nightly_image_build.yaml @@ -50,7 +50,6 @@ jobs: --build-arg CANN_VERSION="8.5.0" \ --build-arg UBUNTU_VERSION="22.04" \ --build-arg PYTHON_VERSION="3.11" \ - --cache-from "$IMAGE_TAG" \ -t "$IMAGE_TAG" . echo "image-tag=$IMAGE_TAG" >> $GITHUB_OUTPUT diff --git a/.github/workflows/schedule_test_vllm_main.yaml b/.github/workflows/schedule_test_vllm_main.yaml index baaa4c94..b60fbb0c 100644 --- a/.github/workflows/schedule_test_vllm_main.yaml +++ b/.github/workflows/schedule_test_vllm_main.yaml @@ -34,6 +34,6 @@ jobs: uses: ./.github/workflows/_e2e_test.yaml with: vllm: main - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:main + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11 contains_310: false type: full diff --git a/Dockerfile b/Dockerfile index a9a58350..b04ae9e1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,49 +19,46 @@ FROM quay.io/ascend/cann:8.5.0-910b-ubuntu22.04-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG MOONCAKE_TAG="v0.3.8.post1" +ARG SOC_VERSION="ascend910b1" + +# Define environments +ENV DEBIAN_FRONTEND=noninteractive +ENV SOC_VERSION=$SOC_VERSION \ + TASK_QUEUE_ENABLE=1 \ + OMP_NUM_THREADS=1 WORKDIR /workspace -COPY ./tools/mooncake_installer.sh /vllm-workspace/ +COPY . /vllm-workspace/vllm-ascend/ -# Install clang-15 (for triton-ascend) and Mooncake +# Install Mooncake dependencies RUN apt-get update -y && \ - apt-get install -y git vim wget net-tools gcc g++ cmake libnuma-dev libjemalloc2 clang-15 && \ - update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 && \ - update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 && \ + apt-get install -y git vim wget net-tools gcc g++ cmake libnuma-dev libjemalloc2 && \ git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \ - mv /vllm-workspace/mooncake_installer.sh /vllm-workspace/Mooncake/ && \ + cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \ cd /vllm-workspace/Mooncake && bash mooncake_installer.sh -y && \ ARCH=$(uname -m) && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/devlib:/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/lib64:$LD_LIBRARY_PATH && \ mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON && \ make -j$(nproc) && make install && \ - rm -rf /vllm-workspace/Mooncake/build && \ + rm -fr /vllm-workspace/Mooncake/build && \ rm -rf /var/cache/apt/* && \ rm -rf /var/lib/apt/lists/* -# Install modelscope (for fast download) and ray (for multinode) -RUN pip config set global.index-url ${PIP_INDEX_URL} && \ - python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ - python3 -m pip cache purge +RUN pip config set global.index-url ${PIP_INDEX_URL} # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git ARG VLLM_TAG=v0.16.0 -RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm && \ - VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ +RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm +# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. +RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip uninstall -y triton && \ python3 -m pip cache purge # Install vllm-ascend -ARG SOC_VERSION="ascend910b1" -ENV DEBIAN_FRONTEND=noninteractive -ENV SOC_VERSION=$SOC_VERSION \ - TASK_QUEUE_ENABLE=1 \ - OMP_NUM_THREADS=1 -COPY . /vllm-workspace/vllm-ascend/ - +# Append `libascend_hal.so` path (devlib) to LD_LIBRARY_PATH RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ source /usr/local/Ascend/nnal/atb/set_env.sh && \ @@ -69,7 +66,18 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge -# Append `libascend_hal.so` path (devlib) to LD_LIBRARY_PATH +# Install clang-15 (for triton-ascend) +RUN apt-get update -y && \ + apt-get -y install clang-15 && \ + update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 && \ + update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 && \ + rm -rf /var/cache/apt/* && \ + rm -rf /var/lib/apt/lists/* + +# Install modelscope (for fast download) and ray (for multinode) +RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ + python3 -m pip cache purge + RUN echo "export LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc CMD ["/bin/bash"] diff --git a/Dockerfile.310p b/Dockerfile.310p index 0122ba5b..ec5772a2 100644 --- a/Dockerfile.310p +++ b/Dockerfile.310p @@ -18,36 +18,37 @@ FROM quay.io/ascend/cann:8.5.0-310p-ubuntu22.04-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" +ARG SOC_VERSION="ascend310p1" -WORKDIR /workspace +# Define environments +ENV DEBIAN_FRONTEND=noninteractive +ENV SOC_VERSION=$SOC_VERSION \ + TASK_QUEUE_ENABLE=1 \ + OMP_NUM_THREADS=1 + RUN apt-get update -y && \ apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev libjemalloc2 && \ rm -rf /var/cache/apt/* && \ rm -rf /var/lib/apt/lists/* -# Install modelscope (for fast download) and ray (for multinode) -RUN pip config set global.index-url ${PIP_INDEX_URL} && \ - python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ - python3 -m pip cache purge +WORKDIR /workspace + +COPY . /vllm-workspace/vllm-ascend/ + +RUN pip config set global.index-url ${PIP_INDEX_URL} # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git ARG VLLM_TAG=v0.16.0 -RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm && \ - # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. - VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ +RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm +# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. +RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip uninstall -y triton && \ python3 -m pip cache purge # Install vllm-ascend -ARG SOC_VERSION="ascend310p1" -ENV DEBIAN_FRONTEND=noninteractive -ENV SOC_VERSION=$SOC_VERSION \ - TASK_QUEUE_ENABLE=1 \ - OMP_NUM_THREADS=1 -COPY . /vllm-workspace/vllm-ascend/ - +# Append `libascend_hal.so` path (devlib) to LD_LIBRARY_PATH RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ source /usr/local/Ascend/nnal/atb/set_env.sh && \ @@ -55,7 +56,10 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge -# Append `libascend_hal.so` path (devlib) to LD_LIBRARY_PATH +# Install modelscope (for fast download) and ray (for multinode) +RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ + python3 -m pip cache purge + RUN echo "export LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc CMD ["/bin/bash"] diff --git a/Dockerfile.310p.openEuler b/Dockerfile.310p.openEuler index 174c47dd..92f94199 100644 --- a/Dockerfile.310p.openEuler +++ b/Dockerfile.310p.openEuler @@ -18,34 +18,32 @@ FROM quay.io/ascend/cann:8.5.0-310p-openeuler24.03-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" +ARG SOC_VERSION="ascend310p1" -WORKDIR /workspace +ENV SOC_VERSION=$SOC_VERSION \ + TASK_QUEUE_ENABLE=1 \ + OMP_NUM_THREADS=1 RUN yum update -y && \ yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc && \ rm -rf /var/cache/yum -# Install modelscope (for fast download) and ray (for multinode) -RUN pip config set global.index-url ${PIP_INDEX_URL} && \ - python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ - python3 -m pip cache purge +RUN pip config set global.index-url ${PIP_INDEX_URL} + +WORKDIR /workspace + +COPY . /vllm-workspace/vllm-ascend/ # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git ARG VLLM_TAG=v0.16.0 -RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm && \ - # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. - VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ +RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm +# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. +RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip uninstall -y triton && \ python3 -m pip cache purge # Install vllm-ascend -ARG SOC_VERSION="ascend310p1" -ENV SOC_VERSION=$SOC_VERSION \ - TASK_QUEUE_ENABLE=1 \ - OMP_NUM_THREADS=1 -COPY . /vllm-workspace/vllm-ascend/ - RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ source /usr/local/Ascend/nnal/atb/set_env.sh && \ @@ -54,6 +52,10 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge +# Install modelscope (for fast download) and ray (for multinode) +RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ + python3 -m pip cache purge + RUN echo "export LD_PRELOAD=/usr/lib64/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc CMD ["/bin/bash"] diff --git a/Dockerfile.a3 b/Dockerfile.a3 index 5b68ae0c..17bd077b 100644 --- a/Dockerfile.a3 +++ b/Dockerfile.a3 @@ -19,20 +19,24 @@ FROM quay.io/ascend/cann:8.5.0-a3-ubuntu22.04-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG MOONCAKE_TAG=v0.3.8.post1 +ARG SOC_VERSION="ascend910_9391" -COPY ./tools/mooncake_installer.sh /vllm-workspace/ - +COPY . /vllm-workspace/vllm-ascend/ +# Define environments ENV DEBIAN_FRONTEND=noninteractive +ENV SOC_VERSION=$SOC_VERSION \ + TASK_QUEUE_ENABLE=1 \ + OMP_NUM_THREADS=1 + +RUN pip config set global.index-url ${PIP_INDEX_URL} WORKDIR /workspace -# Install clang-15 (for triton-ascend) and Mooncake +# Install Mooncake dependencies RUN apt-get update -y && \ - apt-get install -y git vim wget net-tools gcc g++ cmake libnuma-dev libjemalloc2 clang-15 && \ - update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 && \ - update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 && \ + apt-get install -y git vim wget net-tools gcc g++ cmake libnuma-dev libjemalloc2 && \ git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \ - mv /vllm-workspace/mooncake_installer.sh /vllm-workspace/Mooncake/ && \ + cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \ cd /vllm-workspace/Mooncake && bash mooncake_installer.sh -y && \ ARCH=$(uname -m) && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ @@ -43,28 +47,17 @@ RUN apt-get update -y && \ rm -rf /var/cache/apt/* && \ rm -rf /var/lib/apt/lists/* -# Install modelscope (for fast download) and ray (for multinode) -RUN pip config set global.index-url ${PIP_INDEX_URL} && \ - python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ - python3 -m pip cache purge - # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git ARG VLLM_TAG=v0.16.0 -RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm && \ - # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. - VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ +RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm +# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. +RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip uninstall -y triton && \ python3 -m pip cache purge # Install vllm-ascend -ARG SOC_VERSION="ascend910_9391" -ENV DEBIAN_FRONTEND=noninteractive -ENV SOC_VERSION=$SOC_VERSION \ - TASK_QUEUE_ENABLE=1 \ - OMP_NUM_THREADS=1 -COPY . /vllm-workspace/vllm-ascend/ - +# Append `libascend_hal.so` path (devlib) to LD_LIBRARY_PATH RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ source /usr/local/Ascend/nnal/atb/set_env.sh && \ @@ -72,7 +65,18 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge -# Append `libascend_hal.so` path (devlib) to LD_LIBRARY_PATH +# Install clang-15 (for triton-ascend) +RUN apt-get update -y && \ + apt-get -y install clang-15 && \ + update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 && \ + update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 && \ + rm -rf /var/cache/apt/* && \ + rm -rf /var/lib/apt/lists/* + +# Install modelscope (for fast download) and ray (for multinode) +RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ + python3 -m pip cache purge + RUN echo "export LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc CMD ["/bin/bash"] diff --git a/Dockerfile.a3.openEuler b/Dockerfile.a3.openEuler index 1973f501..4e5b3838 100644 --- a/Dockerfile.a3.openEuler +++ b/Dockerfile.a3.openEuler @@ -19,18 +19,24 @@ FROM quay.io/ascend/cann:8.5.0-a3-openeuler24.03-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG MOONCAKE_TAG="v0.3.8.post1" +ARG SOC_VERSION="ascend910_9391" + +ENV SOC_VERSION=$SOC_VERSION \ + TASK_QUEUE_ENABLE=1 \ + OMP_NUM_THREADS=1 + +RUN pip config set global.index-url ${PIP_INDEX_URL} WORKDIR /workspace -COPY ./tools/mooncake_installer.sh /vllm-workspace/ +COPY . /vllm-workspace/vllm-ascend/ SHELL ["/bin/bash", "-c"] -# Install clang (for triton-ascend) and Mooncake RUN yum update -y && \ - yum install -y git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc clang && \ + yum install -y git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc && \ git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \ - mv /vllm-workspace/mooncake_installer.sh /vllm-workspace/Mooncake/ && \ + cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \ ARCH=$(uname -m) && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/devlib:/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/lib64:$LD_LIBRARY_PATH && \ @@ -42,27 +48,16 @@ RUN yum update -y && \ rm -fr /vllm-workspace/Mooncake/build && \ rm -rf /var/cache/yum/* -# Install modelscope (for fast download) and ray (for multinode) -RUN pip config set global.index-url ${PIP_INDEX_URL} && \ - python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ - python3 -m pip cache purge - # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git ARG VLLM_TAG=v0.16.0 -RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm && \ - # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. - VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ +RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm +# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. +RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip uninstall -y triton && \ python3 -m pip cache purge # Install vllm-ascend -ARG SOC_VERSION="ascend910_9391" -ENV SOC_VERSION=$SOC_VERSION \ - TASK_QUEUE_ENABLE=1 \ - OMP_NUM_THREADS=1 -COPY . /vllm-workspace/vllm-ascend/ - RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ source /usr/local/Ascend/nnal/atb/set_env.sh && \ @@ -71,6 +66,15 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge +# Install clang (for triton-ascend) +RUN yum update -y && \ + yum install -y clang && \ + rm -rf /var/cache/yum/* + +# Install modelscope (for fast download) and ray (for multinode) +RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ + python3 -m pip cache purge + RUN echo "export LD_PRELOAD=/usr/lib64/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc CMD ["/bin/bash"] diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler index 03d6c096..825b8a24 100644 --- a/Dockerfile.openEuler +++ b/Dockerfile.openEuler @@ -19,18 +19,24 @@ FROM quay.io/ascend/cann:8.5.0-910b-openeuler24.03-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG MOONCAKE_TAG="v0.3.8.post1" +ARG SOC_VERSION="ascend910b1" + +ENV SOC_VERSION=$SOC_VERSION \ + TASK_QUEUE_ENABLE=1 \ + OMP_NUM_THREADS=1 + +RUN pip config set global.index-url ${PIP_INDEX_URL} WORKDIR /workspace -COPY ./tools/mooncake_installer.sh /vllm-workspace/ +COPY . /vllm-workspace/vllm-ascend/ SHELL ["/bin/bash", "-c"] -# Install clang (for triton-ascend) and Mooncake RUN yum update -y && \ - yum install -y git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc clang && \ + yum install -y git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc && \ git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \ - mv /vllm-workspace/mooncake_installer.sh /vllm-workspace/Mooncake/ && \ + cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \ ARCH=$(uname -m) && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/devlib:/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/lib64:$LD_LIBRARY_PATH && \ @@ -42,27 +48,16 @@ RUN yum update -y && \ rm -fr /vllm-workspace/Mooncake/build && \ rm -rf /var/cache/yum/* -# Install modelscope (for fast download) and ray (for multinode) -RUN pip config set global.index-url ${PIP_INDEX_URL} && \ - python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ - python3 -m pip cache purge - # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git ARG VLLM_TAG=v0.16.0 -RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm && \ - # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. - VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ +RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm +# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. +RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip uninstall -y triton && \ python3 -m pip cache purge # Install vllm-ascend -ARG SOC_VERSION="ascend910b1" -ENV SOC_VERSION=$SOC_VERSION \ - TASK_QUEUE_ENABLE=1 \ - OMP_NUM_THREADS=1 -COPY . /vllm-workspace/vllm-ascend/ - RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ source /usr/local/Ascend/nnal/atb/set_env.sh && \ @@ -71,6 +66,15 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge +# Install clang (for triton-ascend) +RUN yum update -y && \ + yum install -y clang && \ + rm -rf /var/cache/yum/* + +# Install modelscope (for fast download) and ray (for multinode) +RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ + python3 -m pip cache purge + RUN echo "export LD_PRELOAD=/usr/lib64/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc CMD ["/bin/bash"]