### What this PR does / why we need it? This pull request significantly enhances the test suite by adding new end-to-end test cases for Qwen3 models on the 310P hardware platform. The primary goal is to ensure the stability and correctness of these models under diverse operational conditions, including various parallelism strategies, data types, and quantization methods. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? E2E test - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.15.0 --------- Signed-off-by: pu-zhe <zpuaa@outlook.com>
467 lines
16 KiB
YAML
467 lines
16 KiB
YAML
name: 'e2e test'
|
|
|
|
on:
|
|
workflow_call:
|
|
inputs:
|
|
vllm:
|
|
required: true
|
|
type: string
|
|
image:
|
|
required: true
|
|
type: string
|
|
type:
|
|
required: true
|
|
type: string
|
|
contains_310:
|
|
required: true
|
|
type: boolean
|
|
|
|
jobs:
|
|
e2e-light:
|
|
name: singlecard-light
|
|
if: ${{ inputs.type == 'light' }}
|
|
runs-on: linux-aarch64-a2b3-1
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
part: [0]
|
|
container:
|
|
image: ${{ inputs.image }}
|
|
env:
|
|
VLLM_LOGGING_LEVEL: ERROR
|
|
VLLM_USE_MODELSCOPE: True
|
|
HF_HUB_OFFLINE: 1
|
|
steps:
|
|
- name: Checkout vllm-project/vllm-ascend repo
|
|
uses: actions/checkout@v6
|
|
- name: Check npu and CANN info
|
|
run: |
|
|
npu-smi info
|
|
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
|
|
|
|
- name: Config mirrors
|
|
run: |
|
|
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
|
|
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
|
|
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
|
|
apt-get update -y
|
|
apt install git -y
|
|
|
|
- name: Install system dependencies
|
|
run: |
|
|
apt-get -y install `cat packages.txt`
|
|
apt-get -y install gcc g++ cmake libnuma-dev clang-15
|
|
|
|
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20
|
|
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20
|
|
|
|
- name: Checkout vllm-project/vllm repo
|
|
uses: actions/checkout@v6
|
|
with:
|
|
repository: vllm-project/vllm
|
|
ref: ${{ inputs.vllm }}
|
|
path: ./vllm-empty
|
|
fetch-depth: 1
|
|
|
|
- name: Install vllm-project/vllm from source
|
|
working-directory: ./vllm-empty
|
|
run: |
|
|
VLLM_TARGET_DEVICE=empty pip install -e .
|
|
|
|
- name: Install vllm-project/vllm-ascend
|
|
env:
|
|
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
run: |
|
|
pip install -r requirements-dev.txt
|
|
pip install -v -e .
|
|
|
|
- name: Run vllm-project/vllm-ascend test
|
|
env:
|
|
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
|
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
run: |
|
|
python3 .github/workflows/scripts/run_suite.py --suite e2e-singlecard-light --auto-partition-id ${{ matrix.part }} --auto-partition-size 1
|
|
|
|
e2e-full:
|
|
name: singlecard-full
|
|
if: ${{ inputs.type == 'full' }}
|
|
runs-on: linux-aarch64-a2b3-1
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
part: [0, 1]
|
|
container:
|
|
image: ${{ inputs.image }}
|
|
env:
|
|
VLLM_LOGGING_LEVEL: ERROR
|
|
VLLM_USE_MODELSCOPE: True
|
|
HF_HUB_OFFLINE: 1
|
|
steps:
|
|
- name: Checkout vllm-project/vllm-ascend repo
|
|
uses: actions/checkout@v6
|
|
|
|
- name: Check npu and CANN info
|
|
run: |
|
|
npu-smi info
|
|
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
|
|
|
|
- name: Config mirrors
|
|
run: |
|
|
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
|
|
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
|
|
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
|
|
apt-get update -y
|
|
apt install git -y
|
|
|
|
- name: Install system dependencies
|
|
run: |
|
|
apt-get -y install `cat packages.txt`
|
|
apt-get -y install gcc g++ cmake libnuma-dev clang-15
|
|
|
|
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20
|
|
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20
|
|
|
|
- name: Checkout vllm-project/vllm repo
|
|
uses: actions/checkout@v6
|
|
with:
|
|
repository: vllm-project/vllm
|
|
ref: ${{ inputs.vllm }}
|
|
path: ./vllm-empty
|
|
fetch-depth: 1
|
|
|
|
- name: Install vllm-project/vllm from source
|
|
working-directory: ./vllm-empty
|
|
run: |
|
|
VLLM_TARGET_DEVICE=empty pip install -e .
|
|
|
|
- name: Install vllm-project/vllm-ascend
|
|
env:
|
|
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
run: |
|
|
pip install -r requirements-dev.txt
|
|
pip install -v -e .
|
|
- name: Run e2e test
|
|
env:
|
|
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
|
run: |
|
|
python3 .github/workflows/scripts/run_suite.py --suite e2e-singlecard --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
|
|
|
|
e2e-2-cards-light:
|
|
name: multicard-2-light
|
|
if: ${{ inputs.type == 'light' }}
|
|
runs-on: linux-aarch64-a3-2
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
part: [0]
|
|
container:
|
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
|
|
env:
|
|
VLLM_LOGGING_LEVEL: ERROR
|
|
VLLM_USE_MODELSCOPE: True
|
|
HCCL_BUFFSIZE: 1024
|
|
HF_HUB_OFFLINE: 1
|
|
steps:
|
|
- name: Checkout vllm-project/vllm-ascend repo
|
|
uses: actions/checkout@v6
|
|
- name: Check npu and CANN info
|
|
run: |
|
|
npu-smi info
|
|
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
|
|
|
|
- name: Config mirrors
|
|
run: |
|
|
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
|
|
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
|
|
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
|
|
apt-get update -y
|
|
apt install git -y
|
|
|
|
- name: Install system dependencies
|
|
run: |
|
|
apt-get -y install `cat packages.txt`
|
|
apt-get -y install gcc g++ cmake libnuma-dev clang-15
|
|
|
|
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20
|
|
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20
|
|
|
|
- name: Checkout vllm-project/vllm repo
|
|
uses: actions/checkout@v6
|
|
with:
|
|
repository: vllm-project/vllm
|
|
ref: ${{ inputs.vllm }}
|
|
path: ./vllm-empty
|
|
fetch-depth: 1
|
|
|
|
- name: Install vllm-project/vllm from source
|
|
working-directory: ./vllm-empty
|
|
run: |
|
|
VLLM_TARGET_DEVICE=empty pip install -e .
|
|
|
|
- name: Install vllm-project/vllm-ascend
|
|
env:
|
|
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
run: |
|
|
pip install -r requirements-dev.txt
|
|
pip install -v -e .
|
|
- name: Run vllm-project/vllm-ascend test (light)
|
|
env:
|
|
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
run: |
|
|
python3 .github/workflows/scripts/run_suite.py --suite e2e-2card-light --auto-partition-id ${{ matrix.part }} --auto-partition-size 1
|
|
|
|
e2e-2-cards-full:
|
|
name: multicard-2-full
|
|
if: ${{ inputs.type == 'full' }}
|
|
runs-on: linux-aarch64-a3-2
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
part: [0]
|
|
container:
|
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11
|
|
env:
|
|
VLLM_LOGGING_LEVEL: ERROR
|
|
VLLM_USE_MODELSCOPE: True
|
|
HCCL_BUFFSIZE: 1024
|
|
HF_HUB_OFFLINE: 1
|
|
steps:
|
|
- name: Checkout vllm-project/vllm-ascend repo
|
|
uses: actions/checkout@v6
|
|
- name: Check npu and CANN info
|
|
run: |
|
|
npu-smi info
|
|
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
|
|
|
|
- name: Config mirrors
|
|
run: |
|
|
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
|
|
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
|
|
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
|
|
apt-get update -y
|
|
apt install git -y
|
|
|
|
- name: Install system dependencies
|
|
run: |
|
|
apt-get -y install `cat packages.txt`
|
|
apt-get -y install gcc g++ cmake libnuma-dev clang-15
|
|
|
|
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20
|
|
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20
|
|
|
|
- name: Checkout vllm-project/vllm repo
|
|
uses: actions/checkout@v6
|
|
with:
|
|
repository: vllm-project/vllm
|
|
ref: ${{ inputs.vllm }}
|
|
path: ./vllm-empty
|
|
fetch-depth: 1
|
|
|
|
- name: Install vllm-project/vllm from source
|
|
working-directory: ./vllm-empty
|
|
run: |
|
|
VLLM_TARGET_DEVICE=empty pip install -e .
|
|
|
|
- name: Install vllm-project/vllm-ascend
|
|
env:
|
|
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
run: |
|
|
pip install -r requirements-dev.txt
|
|
pip install -v -e .
|
|
- name: Run vllm-project/vllm-ascend test (full)
|
|
env:
|
|
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
run: |
|
|
python3 .github/workflows/scripts/run_suite.py --suite e2e-multicard-2-cards --auto-partition-id ${{ matrix.part }} --auto-partition-size 1
|
|
|
|
- name: Run vllm-project/vllm-ascend test (non triton)
|
|
if: ${{ inputs.type == 'full' && matrix.part == 0 }}
|
|
env:
|
|
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
run: |
|
|
python3 -m pip uninstall -y triton-ascend
|
|
pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py
|
|
|
|
e2e-4-cards-full:
|
|
name: multicard-4-full
|
|
if: ${{ inputs.type == 'full' }}
|
|
runs-on: linux-aarch64-a3-4
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
part: [0]
|
|
container:
|
|
image: m.daocloud.io/quay.io/ascend/cann:8.5.0-a3-ubuntu22.04-py3.11
|
|
env:
|
|
VLLM_LOGGING_LEVEL: ERROR
|
|
VLLM_USE_MODELSCOPE: True
|
|
HF_HUB_OFFLINE: 1
|
|
steps:
|
|
- name: Checkout vllm-project/vllm-ascend repo
|
|
uses: actions/checkout@v6
|
|
- name: Check npu and CANN info
|
|
run: |
|
|
npu-smi info
|
|
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
|
|
|
|
- name: Config mirrors
|
|
run: |
|
|
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
|
|
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
|
|
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
|
|
apt-get update -y
|
|
apt install git -y
|
|
|
|
- name: Install system dependencies
|
|
run: |
|
|
apt-get -y install `cat packages.txt`
|
|
apt-get -y install gcc g++ cmake libnuma-dev clang-15
|
|
|
|
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20
|
|
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20
|
|
|
|
- name: Checkout vllm-project/vllm repo
|
|
uses: actions/checkout@v6
|
|
with:
|
|
repository: vllm-project/vllm
|
|
ref: ${{ inputs.vllm }}
|
|
path: ./vllm-empty
|
|
fetch-depth: 1
|
|
|
|
- name: Install vllm-project/vllm from source
|
|
working-directory: ./vllm-empty
|
|
run: |
|
|
VLLM_TARGET_DEVICE=empty pip install -e .
|
|
|
|
- name: Install vllm-project/vllm-ascend
|
|
env:
|
|
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
run: |
|
|
pip install -r requirements-dev.txt
|
|
pip install -v -e .
|
|
|
|
- name: Run vllm-project/vllm-ascend test for V1 Engine
|
|
env:
|
|
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
run: |
|
|
python3 .github/workflows/scripts/run_suite.py --suite e2e-multicard-4-cards --auto-partition-id ${{ matrix.part }} --auto-partition-size 1
|
|
|
|
e2e_310p:
|
|
name: 310p singlecard
|
|
runs-on: linux-aarch64-310p-1
|
|
if: ${{ inputs.contains_310 }}
|
|
container:
|
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-310p-ubuntu22.04-py3.11
|
|
env:
|
|
VLLM_LOGGING_LEVEL: ERROR
|
|
VLLM_USE_MODELSCOPE: True
|
|
HF_HUB_OFFLINE: 1
|
|
steps:
|
|
- name: Check npu and CANN info
|
|
run: |
|
|
npu-smi info
|
|
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
|
|
- name: Config mirrors
|
|
run: |
|
|
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
|
|
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
|
|
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
|
|
apt-get update -y
|
|
apt install git -y
|
|
|
|
- name: Checkout vllm-project/vllm-ascend repo
|
|
uses: actions/checkout@v6
|
|
|
|
- name: Install system dependencies
|
|
run: |
|
|
apt-get -y install `cat packages.txt`
|
|
apt-get -y install gcc g++ cmake libnuma-dev
|
|
|
|
- name: Checkout vllm-project/vllm repo
|
|
uses: actions/checkout@v6
|
|
with:
|
|
repository: vllm-project/vllm
|
|
ref: ${{ inputs.vllm }}
|
|
path: ./vllm-empty
|
|
fetch-depth: 1
|
|
|
|
- name: Install vllm-project/vllm from source
|
|
working-directory: ./vllm-empty
|
|
run: |
|
|
VLLM_TARGET_DEVICE=empty pip install -e .
|
|
|
|
- name: Install vllm-project/vllm-ascend
|
|
env:
|
|
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
run: |
|
|
pip install -r requirements-dev.txt
|
|
pip install -v -e .
|
|
|
|
- name: Run vllm-project/vllm-ascend test
|
|
env:
|
|
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
|
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
run: |
|
|
pytest -sv --durations=0 tests/e2e/310p/singlecard/test_dense_model_singlecard.py
|
|
|
|
e2e_310p-4cards:
|
|
name: 310p multicards 4cards
|
|
runs-on: linux-aarch64-310p-4
|
|
if: ${{ inputs.contains_310 }}
|
|
container:
|
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-310p-ubuntu22.04-py3.11
|
|
env:
|
|
VLLM_LOGGING_LEVEL: ERROR
|
|
VLLM_USE_MODELSCOPE: True
|
|
HF_HUB_OFFLINE: 1
|
|
steps:
|
|
- name: Check npu and CANN info
|
|
run: |
|
|
npu-smi info
|
|
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
|
|
- name: Config mirrors
|
|
run: |
|
|
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
|
|
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
|
|
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
|
|
apt-get update -y
|
|
apt install git -y
|
|
|
|
- name: Checkout vllm-project/vllm-ascend repo
|
|
uses: actions/checkout@v6
|
|
|
|
- name: Install system dependencies
|
|
run: |
|
|
apt-get -y install `cat packages.txt`
|
|
apt-get -y install gcc g++ cmake libnuma-dev
|
|
|
|
- name: Checkout vllm-project/vllm repo
|
|
uses: actions/checkout@v6
|
|
with:
|
|
repository: vllm-project/vllm
|
|
ref: ${{ inputs.vllm }}
|
|
path: ./vllm-empty
|
|
fetch-depth: 1
|
|
|
|
- name: Install vllm-project/vllm from source
|
|
working-directory: ./vllm-empty
|
|
run: |
|
|
VLLM_TARGET_DEVICE=empty pip install -e .
|
|
|
|
- name: Install vllm-project/vllm-ascend
|
|
env:
|
|
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
run: |
|
|
pip install -r requirements-dev.txt
|
|
pip install -v -e .
|
|
|
|
- name: Run vllm-project/vllm-ascend test
|
|
env:
|
|
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
|
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
run: |
|
|
pytest -sv --durations=0 \
|
|
tests/e2e/310p/multicard/test_dense_model_multicard.py \
|
|
tests/e2e/310p/multicard/test_moe_model_multicard.py
|