[CI] enable custom ops build (#466)

### What this PR does / why we need it?
This PR enable custom ops build  by default. 

### Does this PR introduce _any_ user-facing change?

Yes, users now install vllm-ascend from source will trigger custom ops
build step.

### How was this patch tested?
By image build and e2e CI

---------

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-04-12 10:24:53 +08:00
committed by GitHub
parent d05ea17427
commit 9c7428b3d5
22 changed files with 165 additions and 342 deletions

View File

@@ -46,6 +46,8 @@ jobs:
fetch-depth: 0
- name: "Run actionlint"
env:
SHELLCHECK_OPTS: --exclude=SC2046,SC2006
run: |
echo "::add-matcher::.github/workflows/matchers/actionlint.json"
tools/actionlint.sh -color

View File

@@ -72,9 +72,6 @@ jobs:
- name: Build - Set up QEMU
uses: docker/setup-qemu-action@v3
# TODO(yikun): remove this after https://github.com/docker/setup-qemu-action/issues/198 resolved
with:
image: tonistiigi/binfmt:qemu-v7.0.0-28
- name: Build - Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@@ -98,3 +95,7 @@ jobs:
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ steps.meta.outputs.tags }}
file: Dockerfile.openEuler
# TODO: support and enable custom ops build for openEuler
build-args: |
PIP_INDEX_URL=https://pypi.org/simple
COMPILE_CUSTOM_KERNELS=0

View File

@@ -16,7 +16,7 @@ on:
- 'main'
- '*-dev'
paths:
- '.github/workflows/image.yml'
- '.github/workflows/image_ubuntu.yml'
- 'Dockerfile'
- 'vllm_ascend/**'
push:
@@ -27,13 +27,13 @@ on:
tags:
- 'v*'
paths:
- '.github/workflows/image.yml'
- '.github/workflows/image_ubuntu.yml'
- 'Dockerfile'
- 'vllm_ascend/**'
jobs:
build:
name: vllm-ascend image
name: vllm-ascend Ubuntu image
runs-on: ubuntu-latest
steps:
@@ -72,9 +72,6 @@ jobs:
- name: Build - Set up QEMU
uses: docker/setup-qemu-action@v3
# TODO(yikun): remove this after https://github.com/docker/setup-qemu-action/issues/198 resolved
with:
image: tonistiigi/binfmt:qemu-v7.0.0-28
- name: Build - Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@@ -98,4 +95,4 @@ jobs:
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ steps.meta.outputs.tags }}
build-args: |
PIP_INDEX_URL=https://pypi.org/simple
PIP_INDEX_URL=https://pypi.org/simple

View File

@@ -41,9 +41,14 @@ concurrency:
cancel-in-progress: true
jobs:
test-singlenpu:
name: vLLM Ascend test main(single-npu)
runs-on: linux-arm64-npu-1 # actionlint-ignore: runner-label
test:
strategy:
max-parallel: 2
matrix:
os: [linux-arm64-npu-1, linux-arm64-npu-4]
vllm_verison: [main, v0.8.3]
name: vLLM Ascend test
runs-on: ${{ matrix.os }}
container:
image: quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py3.10
steps:
@@ -72,6 +77,7 @@ jobs:
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
ref: ${{ matrix.vllm_verison }}
path: ./vllm-empty
- name: Install vllm-project/vllm from source
@@ -79,11 +85,6 @@ jobs:
run: |
VLLM_TARGET_DEVICE=empty pip install -e .
- name: Install vllm-project/vllm-ascend
run: |
pip install -r requirements-dev.txt
pip install -e .
- name: Install pta
run: |
if [ ! -d /root/.cache/pta ]; then
@@ -99,12 +100,23 @@ jobs:
pip install /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
- name: Install vllm-project/vllm-ascend
run: |
pip install -r requirements-dev.txt
pip install -e .
- name: Run vllm-project/vllm-ascend test on V0 engine
env:
VLLM_USE_V1: 0
HF_ENDPOINT: https://hf-mirror.com
run: |
VLLM_USE_V1=0 pytest -sv -m 'not multinpu' tests
if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
pytest -sv tests/singlecard
pytest -sv tests/ops
else
pytest -sv tests/multicard
pytest -sv tests/ops
fi
- name: Run vllm-project/vllm-ascend test for V1 Engine
env:
@@ -112,7 +124,13 @@ jobs:
VLLM_WORKER_MULTIPROC_METHOD: spawn
HF_ENDPOINT: https://hf-mirror.com
run: |
pytest -sv -m 'not multinpu' tests
if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
pytest -sv tests/singlecard
pytest -sv tests/ops
else
pytest -sv tests/multicard
pytest -sv tests/ops
fi
- name: Run vllm-project/vllm test for V0 Engine
env:
@@ -121,247 +139,3 @@ jobs:
HF_ENDPOINT: https://hf-mirror.com
run: |
pytest -sv
test-multinpu:
name: vLLM Ascend test main(multi-npu)
runs-on: linux-arm64-npu-4
container:
image: ascendai/cann:8.0.0-910b-ubuntu22.04-py3.10
env:
HF_ENDPOINT: https://hf-mirror.com
HF_TOKEN: ${{ secrets.HF_TOKEN }}
steps:
- name: Check npu and CANN info
run: |
npu-smi info
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
- name: Config mirrors
run: |
# sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
- name: Install system dependencies
run: |
apt-get update -y
apt-get -y install git wget
- name: Config git
run: |
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v4
- name: Install dependencies
run: |
pip install -r requirements-dev.txt
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
path: ./vllm-empty
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty pip install -e .
- name: Install vllm-project/vllm-ascend
run: |
pip install -r requirements-dev.txt
pip install -e .
- name: Install pta
run: |
if [ ! -d /root/.cache/pta ]; then
mkdir -p /root/.cache/pta
fi
if [ ! -f /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl ]; then
cd /root/.cache/pta
rm -rf pytorch_v2.5.1_py310*
wget https://pytorch-package.obs.cn-north-4.myhuaweicloud.com/pta/Daily/v2.5.1/20250320.3/pytorch_v2.5.1_py310.tar.gz
tar -zxvf pytorch_v2.5.1_py310.tar.gz
fi
pip install /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
- name: Run vllm-project/vllm-ascend test on V0 engine
env:
VLLM_USE_V1: 0
HF_ENDPOINT: https://hf-mirror.com
run: |
VLLM_USE_V1=0 pytest -sv -m 'multinpu' tests
- name: Run vllm-project/vllm-ascend test for V1 Engine
env:
VLLM_USE_V1: 1
VLLM_WORKER_MULTIPROC_METHOD: spawn
HF_ENDPOINT: https://hf-mirror.com
run: |
pytest -sv -m 'multinpu' tests
test-singlenpu-v0_8_3:
name: vLLM Ascend test v0.8.3(single-npu)
runs-on: linux-arm64-npu-1 # actionlint-ignore: runner-label
container:
image: quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py3.10
steps:
- name: Check npu and CANN info
run: |
npu-smi info
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
- name: Config mirrors
run: |
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
apt-get update -y
apt install git -y
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v4
- name: Install system dependencies
run: |
apt-get -y install `cat packages.txt`
apt-get -y install gcc g++ cmake libnuma-dev
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
ref: v0.8.3
path: ./vllm-empty
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty pip install -e .
- name: Install vllm-project/vllm-ascend
run: |
pip install -r requirements-dev.txt
pip install -e .
- name: Install pta
run: |
if [ ! -d /root/.cache/pta ]; then
mkdir -p /root/.cache/pta
fi
if [ ! -f /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl ]; then
cd /root/.cache/pta
rm -rf pytorch_v2.5.1_py310*
wget https://pytorch-package.obs.cn-north-4.myhuaweicloud.com/pta/Daily/v2.5.1/20250320.3/pytorch_v2.5.1_py310.tar.gz
tar -zxvf pytorch_v2.5.1_py310.tar.gz
fi
pip install /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
- name: Run vllm-project/vllm-ascend test on V0 engine
env:
VLLM_USE_V1: 0
HF_ENDPOINT: https://hf-mirror.com
run: |
VLLM_USE_V1=0 pytest -sv -m 'not multinpu' tests
- name: Run vllm-project/vllm-ascend test for V1 Engine
env:
VLLM_USE_V1: 1
VLLM_WORKER_MULTIPROC_METHOD: spawn
HF_ENDPOINT: https://hf-mirror.com
run: |
pytest -sv -m 'not multinpu' tests
- name: Run vllm-project/vllm test for V0 Engine
env:
VLLM_USE_V1: 0
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
HF_ENDPOINT: https://hf-mirror.com
run: |
pytest -sv
test-multinpu-v0_8_3:
name: vLLM Ascend test v0.8.3(multi-npu)
runs-on: linux-arm64-npu-4
needs: test-multinpu
container:
image: ascendai/cann:8.0.0-910b-ubuntu22.04-py3.10
env:
HF_ENDPOINT: https://hf-mirror.com
HF_TOKEN: ${{ secrets.HF_TOKEN }}
steps:
- name: Check npu and CANN info
run: |
npu-smi info
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
- name: Config mirrors
run: |
# sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
- name: Install system dependencies
run: |
apt-get update -y
apt-get -y install git wget
- name: Config git
run: |
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v4
- name: Install dependencies
run: |
pip install -r requirements-dev.txt
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
ref: v0.8.3
path: ./vllm-empty
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty pip install -e .
- name: Install vllm-project/vllm-ascend
run: |
pip install -r requirements-dev.txt
pip install -e .
- name: Install pta
run: |
if [ ! -d /root/.cache/pta ]; then
mkdir -p /root/.cache/pta
fi
if [ ! -f /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl ]; then
cd /root/.cache/pta
rm -rf pytorch_v2.5.1_py310*
wget https://pytorch-package.obs.cn-north-4.myhuaweicloud.com/pta/Daily/v2.5.1/20250320.3/pytorch_v2.5.1_py310.tar.gz
tar -zxvf pytorch_v2.5.1_py310.tar.gz
fi
pip install /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
- name: Run vllm-project/vllm-ascend test on V0 engine
env:
VLLM_USE_V1: 0
HF_ENDPOINT: https://hf-mirror.com
run: |
VLLM_USE_V1=0 pytest -sv -m 'multinpu' tests
- name: Run vllm-project/vllm-ascend test for V1 Engine
env:
VLLM_USE_V1: 1
VLLM_WORKER_MULTIPROC_METHOD: spawn
HF_ENDPOINT: https://hf-mirror.com
run: |
pytest -sv -m 'multinpu' tests