diff --git a/.github/workflows/image_310p_openeuler.yml b/.github/workflows/image_310p_openeuler.yml index 2626a77..915ea79 100644 --- a/.github/workflows/image_310p_openeuler.yml +++ b/.github/workflows/image_310p_openeuler.yml @@ -6,10 +6,10 @@ name: 'image / openEuler / 310p' # - push: ${{ github.event_name != 'pull_request' }} ==> false # 2. branches push trigger image publish # - is for branch/dev/nightly image -# - commits are merge into main/*-dev ==> vllm-ascend:main / vllm-ascend:*-dev +# - commits are merge into main/*-dev ==> vllm-ascend:main-310p-openeuler / vllm-ascend:*-dev-310p-openeuler # 3. tags push trigger image publish # - is for final release image -# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-openeuler|latest / vllm-ascend:v1.2.3rc1-openeuler +# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-310p-openeuler / vllm-ascend:v1.2.3rc1-310p-openeuler on: pull_request: branches: @@ -33,7 +33,7 @@ on: tags: - 'v*' paths: - - '.github/workflows/image_310p.openeuler.yml' + - '.github/workflows/image_310p_openeuler.yml' - 'Dockerfile.310p.openEuler' - 'vllm_ascend/**' @@ -63,16 +63,18 @@ jobs: # Note for test case # https://github.com/marketplace/actions/docker-metadata-action#typeref # 1. branch job pulish per main/*-dev branch commits - # 2. main and dev pull_request is build only, so the tag pr-N-openeuler is fine + # 2. main and dev pull_request is build only, so the tag pr-N-310p-openeuler is fine # 3. only pep440 matched tag will be published: - # - v0.7.1 --> v0.7.1-openeuler, latest - # - pre/post/dev: v0.7.1rc1-openeuler/v0.7.1rc1-openeuler/v0.7.1rc1.dev1-openeuler/v0.7.1.post1-openeuler, no latest + # - v0.7.1 --> v0.7.1-310p-openeuler + # - pre/post/dev: v0.7.1rc1-310p-openeuler/v0.7.1rc1-310p-openeuler/v0.7.1rc1.dev1-310p-openeuler/v0.7.1.post1-310p-openeuler, no latest # which follow the rule from vLLM with prefix v # TODO(yikun): the post release might be considered as latest release tags: | type=ref,event=branch,suffix=-310p-openeuler - type=ref,event=pr,suffix=-openeuler + type=ref,event=pr,suffix=-310p-openeuler type=pep440,pattern={{raw}},suffix=-310p-openeuler + flavor: + latest=false - name: Free up disk space uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 @@ -112,3 +114,4 @@ jobs: file: Dockerfile.310p.openEuler build-args: | PIP_INDEX_URL=https://pypi.org/simple + provenance: false diff --git a/.github/workflows/image_310p_ubuntu.yml b/.github/workflows/image_310p_ubuntu.yml index 638c0e3..fc040b3 100644 --- a/.github/workflows/image_310p_ubuntu.yml +++ b/.github/workflows/image_310p_ubuntu.yml @@ -6,10 +6,10 @@ name: 'image / Ubuntu / 310p' # - push: ${{ github.event_name != 'pull_request' }} ==> false # 2. branches push trigger image publish # - is for branch/dev/nightly image -# - commits are merge into main/*-dev ==> vllm-ascend:main / vllm-ascend:*-dev +# - commits are merge into main/*-dev ==> vllm-ascend:main-310p / vllm-ascend:*-dev-310p # 3. tags push trigger image publish # - is for final release image -# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3|latest / vllm-ascend:v1.2.3rc1 +# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-310p / vllm-ascend:v1.2.3rc1-310p on: pull_request: branches: @@ -61,14 +61,16 @@ jobs: # 1. branch job pulish per main/*-dev branch commits # 2. main and dev pull_request is build only, so the tag pr-N is fine # 3. only pep440 matched tag will be published: - # - v0.7.1 --> v0.7.1, latest - # - pre/post/dev: v0.7.1rc1/v0.7.1rc1/v0.7.1rc1.dev1/v0.7.1.post1, no latest + # - v0.7.1 --> v0.7.1-310p + # - pre/post/dev: v0.7.1rc1-310p/v0.7.1rc1-310p/v0.7.1rc1.dev1-310p/v0.7.1.post1-310p, no latest # which follow the rule from vLLM with prefix v # TODO(yikun): the post release might be considered as latest release tags: | type=ref,event=branch,suffix=-310p type=ref,event=pr,suffix=-310p type=pep440,pattern={{raw}},suffix=-310p + flavor: + latest=false - name: Free up disk space uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 @@ -108,3 +110,4 @@ jobs: tags: ${{ steps.meta.outputs.tags }} build-args: | PIP_INDEX_URL=https://pypi.org/simple + provenance: false \ No newline at end of file diff --git a/.github/workflows/image_a3_openeuler.yml b/.github/workflows/image_a3_openeuler.yml new file mode 100644 index 0000000..38b46d0 --- /dev/null +++ b/.github/workflows/image_a3_openeuler.yml @@ -0,0 +1,117 @@ +name: 'image / openEuler / a3' +# This is a docker build check and publish job: +# 1. PR Triggered docker image build check +# - is for image build check +# - Enable on main/*-dev branch +# - push: ${{ github.event_name != 'pull_request' }} ==> false +# 2. branches push trigger image publish +# - is for branch/dev/nightly image +# - commits are merge into main/*-dev ==> vllm-ascend:main / vllm-ascend:*-dev +# 3. tags push trigger image publish +# - is for final release image +# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-a3-openeuler / vllm-ascend:v1.2.3rc1-a3-openeuler +on: + pull_request: + branches: + - 'main' + - '*-dev' + paths: + - '.github/workflows/image_a3_openeuler.yml' + - 'Dockerfile.a3.openEuler' + - 'vllm_ascend/**' + - 'setup.py' + - 'pyproject.toml' + - 'requirements.txt' + - 'cmake/**' + - 'CMakeLists.txt' + - 'csrc/**' + push: + # Publish image when tagging, the Dockerfile in tag will be build as tag image + branches: + - 'main' + - '*-dev' + tags: + - 'v*' + paths: + - '.github/workflows/image_a3_openeuler.yml' + - 'Dockerfile.a3.openEuler' + - 'vllm_ascend/**' + +jobs: + build: + name: vllm-ascend image build + runs-on: >- + ${{ + github.event_name == 'push' && github.repository_owner == 'vllm-project' && + 'ubuntu-latest' || + 'ubuntu-24.04-arm' + }} + steps: + - uses: actions/checkout@v4 + + - name: Print + run: | + lscpu + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + # TODO(yikun): add more hub image and a note on release policy for container image + images: | + quay.io/ascend/vllm-ascend + # Note for test case + # https://github.com/marketplace/actions/docker-metadata-action#typeref + # 1. branch job pulish per main/*-dev branch commits + # 2. main and dev pull_request is build only, so the tag pr-N-a3-openeuler is fine + # 3. only pep440 matched tag will be published: + # - v0.7.1 --> v0.7.1-a3-openeuler + # - pre/post/dev: v0.7.1rc1-a3-openeuler/v0.7.1rc1-a3-openeuler/v0.7.1rc1.dev1-a3-openeuler/v0.7.1.post1-a3-openeuler, no latest + # which follow the rule from vLLM with prefix v + # TODO(yikun): the post release might be considered as latest release + tags: | + type=ref,event=branch,suffix=-a3-openeuler + type=ref,event=pr,suffix=-a3-openeuler + type=pep440,pattern={{raw}},suffix=-a3-openeuler + flavor: + latest=false + + - name: Free up disk space + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + with: + tool-cache: true + docker-images: false + + - name: Build - Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Build - Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Publish - Login to Quay Container Registry + if: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }} + uses: docker/login-action@v3 + with: + registry: quay.io + username: ${{ vars.QUAY_USERNAME }} + password: ${{ secrets.QUAY_PASSWORD }} + + - name: Build and push a3 + uses: docker/build-push-action@v6 + with: + platforms: >- + ${{ + github.event_name == 'push' && github.repository_owner == 'vllm-project' && + 'linux/amd64,linux/arm64' || + 'linux/arm64' + }} + # use the current repo path as the build context, ensure .git is contained + context: . + # only trigger when tag, branch/main push + push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }} + labels: ${{ steps.meta.outputs.labels }} + tags: ${{ steps.meta.outputs.tags }} + file: Dockerfile.a3.openEuler + build-args: | + PIP_INDEX_URL=https://pypi.org/simple + provenance: false + diff --git a/.github/workflows/image_a3_ubuntu.yml b/.github/workflows/image_a3_ubuntu.yml new file mode 100644 index 0000000..f031bd2 --- /dev/null +++ b/.github/workflows/image_a3_ubuntu.yml @@ -0,0 +1,113 @@ +name: 'image / Ubuntu / a3' +# This is a docker build check and publish job: +# 1. PR Triggered docker image build check +# - is for image build check +# - Enable on main/*-dev branch +# - push: ${{ github.event_name != 'pull_request' }} ==> false +# 2. branches push trigger image publish +# - is for branch/dev/nightly image +# - commits are merge into main/*-dev ==> vllm-ascend:main / vllm-ascend:*-dev +# 3. tags push trigger image publish +# - is for final release image +# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-a3|vllm-ascend:v1.2.3rc1-a3 +on: + pull_request: + branches: + - 'main' + - '*-dev' + paths: + - '.github/workflows/image_a3_ubuntu.yml' + - 'Dockerfile.a3' + - 'vllm_ascend/**' + - 'setup.py' + - 'pyproject.toml' + - 'requirements.txt' + - 'cmake/**' + - 'CMakeLists.txt' + - 'csrc/**' + push: + # Publish image when tagging, the Dockerfile in tag will be build as tag image + branches: + - 'main' + - '*-dev' + tags: + - 'v*' + paths: + - '.github/workflows/image_a3_ubuntu.yml' + - 'Dockerfile.a3' + - 'vllm_ascend/**' +jobs: + + build: + name: vllm-ascend image build + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Print + run: | + lscpu + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + # TODO(yikun): add more hub image and a note on release policy for container image + images: | + quay.io/ascend/vllm-ascend + # Note for test case + # https://github.com/marketplace/actions/docker-metadata-action#typeref + # 1. branch job pulish per main/*-dev branch commits + # 2. main and dev pull_request is build only, so the tag pr-N-a3 is fine + # 3. only pep440 matched tag will be published: + # - v0.7.1 --> v0.7.1-a3 + # - pre/post/dev: v0.7.1rc1-a3/v0.7.1rc1-a3/v0.7.1rc1.dev1-a3/v0.7.1.post1-a3, no latest + # which follow the rule from vLLM with prefix v + # TODO(yikun): the post release might be considered as latest release + tags: | + type=ref,event=branch,suffix=-a3 + type=ref,event=pr,suffix=-a3 + type=pep440,pattern={{raw}},suffix=-a3 + flavor: + latest=false + + - name: Free up disk space + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + with: + tool-cache: true + docker-images: false + + - name: Build - Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Build - Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Publish - Login to Quay Container Registry + if: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }} + uses: docker/login-action@v3 + with: + registry: quay.io + username: ${{ vars.QUAY_USERNAME }} + password: ${{ secrets.QUAY_PASSWORD }} + + - name: Build and push a3 + uses: docker/build-push-action@v6 + with: + platforms: >- + ${{ + github.event_name == 'push' && github.repository_owner == 'vllm-project' && + 'linux/amd64,linux/arm64' || + 'linux/amd64' + }} + # use the current repo path as the build context, ensure .git is contained + context: . + file: Dockerfile.a3 + # only trigger when tag, branch/main push + push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }} + labels: ${{ steps.meta.outputs.labels }} + tags: ${{ steps.meta.outputs.tags }} + build-args: | + PIP_INDEX_URL=https://pypi.org/simple + provenance: false + diff --git a/.github/workflows/image_openeuler.yml b/.github/workflows/image_openeuler.yml index c954e56..c065d09 100644 --- a/.github/workflows/image_openeuler.yml +++ b/.github/workflows/image_openeuler.yml @@ -6,10 +6,9 @@ name: 'image / openEuler' # - push: ${{ github.event_name != 'pull_request' }} ==> false # 2. branches push trigger image publish # - is for branch/dev/nightly image -# - commits are merge into main/*-dev ==> vllm-ascend:main / vllm-ascend:*-dev -# 3. tags push trigger image publish +# - commits are merge into main/*-dev ==> vllm-ascend:main-openeuler / vllm-ascend:*-dev-openeuler # - is for final release image -# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-openeuler|latest / vllm-ascend:v1.2.3rc1-openeuler +# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-openeuler / vllm-ascend:v1.2.3rc1-openeuler on: pull_request: branches: @@ -65,7 +64,7 @@ jobs: # 1. branch job pulish per main/*-dev branch commits # 2. main and dev pull_request is build only, so the tag pr-N-openeuler is fine # 3. only pep440 matched tag will be published: - # - v0.7.1 --> v0.7.1-openeuler, latest + # - v0.7.1 --> v0.7.1-openeuler # - pre/post/dev: v0.7.1rc1-openeuler/v0.7.1rc1-openeuler/v0.7.1rc1.dev1-openeuler/v0.7.1.post1-openeuler, no latest # which follow the rule from vLLM with prefix v # TODO(yikun): the post release might be considered as latest release @@ -73,6 +72,8 @@ jobs: type=ref,event=branch,suffix=-openeuler type=ref,event=pr,suffix=-openeuler type=pep440,pattern={{raw}},suffix=-openeuler + flavor: + latest=true - name: Free up disk space uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 @@ -112,3 +113,4 @@ jobs: file: Dockerfile.openEuler build-args: | PIP_INDEX_URL=https://pypi.org/simple + provenance: false diff --git a/.github/workflows/image_ubuntu.yml b/.github/workflows/image_ubuntu.yml index 69fe385..a7818e3 100644 --- a/.github/workflows/image_ubuntu.yml +++ b/.github/workflows/image_ubuntu.yml @@ -9,7 +9,7 @@ name: 'image / Ubuntu' # - commits are merge into main/*-dev ==> vllm-ascend:main / vllm-ascend:*-dev # 3. tags push trigger image publish # - is for final release image -# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3|latest / vllm-ascend:v1.2.3rc1 +# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3 / vllm-ascend:v1.2.3rc1 on: pull_request: branches: @@ -69,6 +69,8 @@ jobs: type=ref,event=branch type=ref,event=pr type=pep440,pattern={{raw}} + flavor: + latest=true - name: Free up disk space uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 @@ -108,3 +110,4 @@ jobs: tags: ${{ steps.meta.outputs.tags }} build-args: | PIP_INDEX_URL=https://pypi.org/simple + provenance: false diff --git a/Dockerfile.a3 b/Dockerfile.a3 new file mode 100644 index 0000000..8deae57 --- /dev/null +++ b/Dockerfile.a3 @@ -0,0 +1,60 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# + +FROM quay.io/ascend/cann:8.1.rc1-a3-ubuntu22.04-py3.10 + +ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" +ARG COMPILE_CUSTOM_KERNELS=1 + +# Define environments +ENV DEBIAN_FRONTEND=noninteractive +ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS} + +RUN apt-get update -y && \ + apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev && \ + rm -rf /var/cache/apt/* && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /workspace + +COPY . /vllm-workspace/vllm-ascend/ + +RUN pip config set global.index-url ${PIP_INDEX_URL} + +# Install vLLM +ARG VLLM_REPO=https://github.com/vllm-project/vllm.git +ARG VLLM_TAG=v0.9.2 +RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm +# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. +RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \ + python3 -m pip uninstall -y triton && \ + python3 -m pip cache purge + +# Install vllm-ascend +# Append `libascend_hal.so` path (devlib) to LD_LIBRARY_PATH +RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \ + source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ + source /usr/local/Ascend/nnal/atb/set_env.sh && \ + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \ + python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ + python3 -m pip cache purge + +# Install modelscope (for fast download) and ray (for multinode) +RUN python3 -m pip install modelscope ray && \ + python3 -m pip cache purge + +CMD ["/bin/bash"] \ No newline at end of file diff --git a/Dockerfile.a3.openEuler b/Dockerfile.a3.openEuler new file mode 100644 index 0000000..f0a1001 --- /dev/null +++ b/Dockerfile.a3.openEuler @@ -0,0 +1,57 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# + +FROM quay.io/ascend/cann:8.1.rc1-a3-openeuler22.03-py3.10 + +ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" +ARG COMPILE_CUSTOM_KERNELS=1 + +ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS} + +RUN yum update -y && \ + yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \ + rm -rf /var/cache/yum + +RUN pip config set global.index-url ${PIP_INDEX_URL} + +WORKDIR /workspace + +COPY . /vllm-workspace/vllm-ascend/ + +# Install vLLM +ARG VLLM_REPO=https://github.com/vllm-project/vllm.git +ARG VLLM_TAG=v0.9.2 + +RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm +# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. +RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \ + python3 -m pip uninstall -y triton && \ + python3 -m pip cache purge + +# Install vllm-ascend +RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \ + source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ + source /usr/local/Ascend/nnal/atb/set_env.sh && \ + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \ + python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ + python3 -m pip cache purge + +# Install modelscope (for fast download) and ray (for multinode) +RUN python3 -m pip install modelscope ray && \ + python3 -m pip cache purge + +CMD ["/bin/bash"] \ No newline at end of file diff --git a/vllm_ascend/envs.py b/vllm_ascend/envs.py index 5ea6aa9..6d2fc8f 100644 --- a/vllm_ascend/envs.py +++ b/vllm_ascend/envs.py @@ -51,8 +51,8 @@ env_variables: Dict[str, Callable[[], Any]] = { "C_COMPILER": lambda: os.getenv("C_COMPILER", None), # The version of the Ascend chip. If not set, the default value is - # ASCEND910B1. It's used for package building. Please make sure that the - # version is correct. + # ASCEND910B1(Available for A2 and A3 series). It's used for package building. + # Please make sure that the version is correct. "SOC_VERSION": lambda: os.getenv("SOC_VERSION", "ASCEND910B1"), # If set, vllm-ascend will print verbose logs during compilation