[CI][Doc] Optimize multi-node CI (#3565)
### What this PR does / why we need it?
This pull request mainly do the following things:
1. Add a doc for multi-node CI, The main content is the mechanism
principle and how to contribute
2. Simplify the config yaml for more developer-friendly
3. Optimized the mooncake installation script to prevent accidental
failures during installation
4. Fix the workflow to ensure the kubernetes can be apply correctly
5. Add Qwen3-235B-W8A8 disaggregated_prefill test
6. Add GLM-4.5 multi dp test
7. Add 2p1d 4nodes disaggregated_prefill test
8. Refactor nightly tests
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.11.0rc3
- vLLM main:
17c540a993
---------
Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
190
.github/workflows/_e2e_nightly_multi_node.yaml
vendored
Normal file
190
.github/workflows/_e2e_nightly_multi_node.yaml
vendored
Normal file
@@ -0,0 +1,190 @@
|
||||
name: 'e2e nightly test multi_node'
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
soc_version:
|
||||
required: true
|
||||
type: string
|
||||
description: use a2 or a3
|
||||
image:
|
||||
required: false
|
||||
type: string
|
||||
description: base image for pods
|
||||
default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11"
|
||||
config_file_path:
|
||||
required: true
|
||||
type: string
|
||||
description: the model config for multi_node test
|
||||
replicas:
|
||||
required: false
|
||||
default: "1"
|
||||
type: string
|
||||
description: replicas of the k8s cluster
|
||||
size:
|
||||
required: false
|
||||
default: "2"
|
||||
type: string
|
||||
description: how many pods will be pulled up via lws.yaml, indicates number of nodes we need
|
||||
vllm_version:
|
||||
required: false
|
||||
default: "v0.11.0"
|
||||
type: string
|
||||
description: vllm version to use
|
||||
vllm_ascend_remote_url:
|
||||
required: false
|
||||
default: https://github.com/vllm-project/vllm-ascend.git
|
||||
type: string
|
||||
description: used for pr level tests
|
||||
vllm_ascend_ref:
|
||||
required: false
|
||||
default: main
|
||||
type: string
|
||||
description: used for pr level tests
|
||||
|
||||
|
||||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
||||
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
||||
# It's used to activate ascend-toolkit environment variables.
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -el {0}
|
||||
|
||||
# only cancel in-progress runs of the same workflow
|
||||
# and ignore the lint / 8 cards test type
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
e2e:
|
||||
# This is a runner with no NPU for k8s controller
|
||||
runs-on: linux-aarch64-a3-0
|
||||
container:
|
||||
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
||||
env:
|
||||
KUBECONFIG: /tmp/kubeconfig
|
||||
KUBECTL: /root/.cache/.kube/kubectl
|
||||
NAMESPACE: vllm-project
|
||||
LEADER_POD: vllm-0
|
||||
RESULT_FILE: /root/.cache/tests/ret/test_result.txt
|
||||
steps:
|
||||
- name: Install system denpendencies
|
||||
run: |
|
||||
# configure apt and pip source
|
||||
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
|
||||
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
||||
pip install jinja2-cli
|
||||
|
||||
apt-get update -y && apt-get install -y git curl
|
||||
|
||||
- name: Install kubectl
|
||||
run: |
|
||||
# Install kubectl
|
||||
install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl
|
||||
|
||||
# Verify kubectl installation
|
||||
kubectl version --client=true
|
||||
|
||||
# TODO: Add A2 tests
|
||||
- name: Setup kubeconfig for A3
|
||||
if: inputs.soc_version == 'a3'
|
||||
run: |
|
||||
# Decode and save kubeconfig
|
||||
echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Prepare scripts
|
||||
run: |
|
||||
# prepare for lws entrypoint scripts
|
||||
install -D tests/e2e/nightly/multi_node/scripts/run.sh /root/.cache/tests/run.sh
|
||||
|
||||
- name: Clear result ret
|
||||
run: |
|
||||
rm -f $RESULT_FILE
|
||||
|
||||
- name: Launch cluster
|
||||
run: |
|
||||
set -e
|
||||
|
||||
size="${{ inputs.size }}"
|
||||
replicas="${{ inputs.replicas }}"
|
||||
image="${{ inputs.image }}"
|
||||
config_file_path="${{ inputs.config_file_path }}"
|
||||
vllm_version="${{ inputs.vllm_version }}"
|
||||
vllm_ascend_ref="${{ inputs.vllm_ascend_ref }}"
|
||||
vllm_ascend_remote_url="${{ inputs.vllm_ascend_remote_url }}"
|
||||
result_file_path="$RESULT_FILE"
|
||||
|
||||
required_params=("size" "replicas" "image" "config_file_path")
|
||||
for param in "${required_params[@]}"; do
|
||||
if [ -z "${!param}" ]; then
|
||||
echo "Error: Parameter '$param' is required but empty"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
jinja2 tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 \
|
||||
-D size="$size" \
|
||||
-D replicas="$replicas" \
|
||||
-D image="$image" \
|
||||
-D config_file_path="$config_file_path" \
|
||||
-D vllm_version="$vllm_version" \
|
||||
-D vllm_ascend_remote_url="$vllm_ascend_remote_url" \
|
||||
-D vllm_ascend_ref="$vllm_ascend_ref" \
|
||||
-D result_file_path="$result_file_path" \
|
||||
--outfile lws.yaml
|
||||
|
||||
kubectl apply -f ./lws.yaml
|
||||
|
||||
- name: Waiting for pod ready
|
||||
run: |
|
||||
echo "waiting for Pod [$LEADER_POD] in namespace [$NAMESPACE] to Ready..."
|
||||
|
||||
while true; do
|
||||
# get pod status
|
||||
READY_STATUS=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.containerStatuses[*].ready}')
|
||||
|
||||
if [[ "$READY_STATUS" == "true" ]]; then
|
||||
echo "Pod [$LEADER_POD] is Ready!"
|
||||
break
|
||||
else
|
||||
echo "Pod [$LEADER_POD] not ready, waiting..."
|
||||
sleep 3
|
||||
fi
|
||||
done
|
||||
|
||||
- name: Stream logs
|
||||
run: |
|
||||
kubectl logs -f "$LEADER_POD" -n "$NAMESPACE"
|
||||
|
||||
- name: Determine is success
|
||||
run: |
|
||||
TIMEOUT=600
|
||||
ELAPSED=0
|
||||
while [ ! -f "$RESULT_FILE" ]; do
|
||||
sleep 5
|
||||
ELAPSED=$((ELAPSED + 5))
|
||||
if [ $ELAPSED -ge $TIMEOUT ]; then
|
||||
echo "Timeout waiting for test result file"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
RET=$(cat "$RESULT_FILE")
|
||||
echo "Test result: $RET"
|
||||
|
||||
if [ "$RET" -ne 0 ]; then
|
||||
echo "Test failed"
|
||||
exit 1
|
||||
else
|
||||
echo "Test succeeded"
|
||||
fi
|
||||
|
||||
- name: Post process
|
||||
if: always()
|
||||
run: |
|
||||
kubectl get pods -n $NAMESPACE
|
||||
kubectl delete -f ./lws.yaml
|
||||
125
.github/workflows/multi_node_test.yaml
vendored
125
.github/workflows/multi_node_test.yaml
vendored
@@ -1,125 +0,0 @@
|
||||
name: 'e2e test / multi-dp'
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 */4 * * *"
|
||||
workflow_dispatch:
|
||||
|
||||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
||||
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
||||
# It's used to activate ascend-toolkit environment variables.
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -el {0}
|
||||
|
||||
# only cancel in-progress runs of the same workflow
|
||||
# and ignore the lint / 8 cards test type
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
e2e:
|
||||
# This is a runner with no NPU for k8s controller
|
||||
runs-on: linux-aarch64-a3-0
|
||||
container:
|
||||
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
||||
env:
|
||||
KUBECONFIG: /tmp/kubeconfig
|
||||
KUBECTL: /root/.cache/.kube/kubectl
|
||||
NAMESPACE: vllm-project
|
||||
LEADER_POD: vllm-0
|
||||
steps:
|
||||
- name: Install system denpendencies
|
||||
run: |
|
||||
# configure apt and pip source
|
||||
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
|
||||
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
||||
pip install jinja2-cli -y
|
||||
|
||||
apt-get update -y && apt-get install -y git curl
|
||||
|
||||
TOKEN=`echo -n "x-access-token:${{ secrets.ADMIN_PTA }}" | base64`
|
||||
git config --global http.https://gh-proxy.test.osinfra.cn/.extraheader "AUTHORIZATION: basic $TOKEN"
|
||||
|
||||
- name: Install kubectl
|
||||
run: |
|
||||
install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl
|
||||
|
||||
# get kubeconfig from secret
|
||||
echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Prepare scripts
|
||||
run: |
|
||||
# prepare for lws entrypoint scripts
|
||||
install -D tests/e2e/multi_node/scripts/run.sh /root/.cache/tests/run.sh
|
||||
|
||||
- name: Launch cluster
|
||||
run: |
|
||||
jinja2 tests/e2e/multi_node/scripts/lws.yaml.jinja2 \
|
||||
-D size=2 \
|
||||
-D replicas=1 \
|
||||
-D image="m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11" \
|
||||
--outfile lws.yaml
|
||||
|
||||
kubectl apply -f ./lws.yaml
|
||||
|
||||
- name: Waiting for pod ready
|
||||
run: |
|
||||
echo "waiting for Pod [$LEADER_POD] in namespace [$NAMESPACE] to Ready..."
|
||||
|
||||
while true; do
|
||||
# get pod status
|
||||
READY_STATUS=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.containerStatuses[*].ready}')
|
||||
|
||||
if [[ "$READY_STATUS" == "true" ]]; then
|
||||
echo "✅ Pod [$LEADER_POD] is Ready!"
|
||||
break
|
||||
else
|
||||
echo "Pod [$LEADER_POD] not ready, waiting..."
|
||||
sleep 3
|
||||
fi
|
||||
done
|
||||
|
||||
- name: Stream logs and monitor pod health
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
echo "🚀 Start streaming logs for Pod [$LEADER_POD] ..."
|
||||
kubectl logs -f "$LEADER_POD" -n "$NAMESPACE" &
|
||||
LOG_PID=$!
|
||||
|
||||
echo "Start monitoring Pod [$LEADER_POD] status ..."
|
||||
while true; do
|
||||
STATUS=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.phase}')
|
||||
if [[ "$STATUS" != "Running" && "$STATUS" != "Succeeded" ]]; then
|
||||
echo "❌ Pod [$LEADER_POD] exited abnormally with status: $STATUS"
|
||||
kubectl describe pod "$LEADER_POD" -n "$NAMESPACE" || true
|
||||
kubectl logs "$LEADER_POD" -n "$NAMESPACE" --previous --all-containers || true
|
||||
kill $LOG_PID || true
|
||||
exit 1
|
||||
fi
|
||||
sleep 5
|
||||
done &
|
||||
|
||||
MONITOR_PID=$!
|
||||
wait $LOG_PID || true
|
||||
kill $MONITOR_PID || true
|
||||
|
||||
- name: Generate summary
|
||||
if: always()
|
||||
run: |
|
||||
if [ -f "/root/.cache/test_summary.md" ]; then
|
||||
cat /root/.cache/test_summary.md >> "$GITHUB_STEP_SUMMARY"
|
||||
else
|
||||
echo "No summary file found." >> "$GITHUB_STEP_SUMMARY"
|
||||
fi
|
||||
|
||||
- name: Post process
|
||||
if: always()
|
||||
run: |
|
||||
kubectl get pods -n $NAMESPACE
|
||||
kubectl delete -f ./lws.yaml
|
||||
133
.github/workflows/vllm_ascend_test_nightly.yaml
vendored
133
.github/workflows/vllm_ascend_test_nightly.yaml
vendored
@@ -1,133 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
|
||||
name: 'ascend test / nightly'
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run test at 24:00 Beijing time (UTC+8)
|
||||
- cron: "0 16 * * *"
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- '*-dev'
|
||||
types: [labeled,opened,synchronize]
|
||||
|
||||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
||||
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
||||
# It's used to activate ascend-toolkit environment variables.
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -el {0}
|
||||
|
||||
# only cancel in-progress runs of the same workflow
|
||||
# and ignore the lint / 1 card / 4 cards test type
|
||||
concurrency:
|
||||
group: ascend-nightly-${{ github.ref }}
|
||||
#cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
qwen3-32b:
|
||||
if: contains(github.event.pull_request.labels.*.name, 'run-nightly')
|
||||
strategy:
|
||||
matrix:
|
||||
# should add A3 chip runner when available
|
||||
os: [linux-aarch64-a2-4]
|
||||
# Note (yikun): If CI resource are limited we can split job into two chain jobs
|
||||
# only trigger e2e test after lint passed and the change is e2e related with pull request.
|
||||
uses: ./.github/workflows/_e2e_nightly.yaml
|
||||
with:
|
||||
vllm: v0.11.0
|
||||
runner: ${{ matrix.os }}
|
||||
tests: tests/e2e/nightly/models/test_qwen3_32b.py
|
||||
qwen3-32b-in8-a3:
|
||||
if: contains(github.event.pull_request.labels.*.name, 'run-nightly')
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ linux-aarch64-a3-4 ]
|
||||
uses: ./.github/workflows/_e2e_nightly.yaml
|
||||
with:
|
||||
vllm: v0.11.0
|
||||
runner: ${{ matrix.os }}
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
||||
tests: tests/e2e/nightly/models/test_qwen3_32b_int8.py
|
||||
qwen3-32b-in8-a2:
|
||||
if: contains(github.event.pull_request.labels.*.name, 'run-nightly')
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ linux-aarch64-a2-4 ]
|
||||
uses: ./.github/workflows/_e2e_nightly.yaml
|
||||
with:
|
||||
vllm: v0.11.0
|
||||
runner: ${{ matrix.os }}
|
||||
tests: tests/e2e/nightly/models/test_qwen3_32b_int8.py
|
||||
qwen3-235b-a22b-w8a8-eplb:
|
||||
if: contains(github.event.pull_request.labels.*.name, 'run-nightly')
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ linux-aarch64-a3-16 ]
|
||||
uses: ./.github/workflows/_e2e_nightly.yaml
|
||||
with:
|
||||
vllm: v0.11.0
|
||||
runner: ${{ matrix.os }}
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
||||
tests: tests/e2e/nightly/models/test_qwen3_235b_a22b_w8a8_eplb.py
|
||||
deepseek-r1-w8a8-eplb:
|
||||
if: contains(github.event.pull_request.labels.*.name, 'run-nightly')
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ linux-aarch64-a3-16 ]
|
||||
uses: ./.github/workflows/_e2e_nightly.yaml
|
||||
with:
|
||||
vllm: v0.11.0
|
||||
runner: ${{ matrix.os }}
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
||||
tests: tests/e2e/nightly/models/test_deepseek_r1_w8a8_eplb.py
|
||||
qwen3-32b-int8-a3-feature-stack3:
|
||||
if: contains(github.event.pull_request.labels.*.name, 'run-nightly')
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ linux-aarch64-a3-4 ]
|
||||
uses: ./.github/workflows/_e2e_nightly.yaml
|
||||
with:
|
||||
vllm: v0.11.0
|
||||
runner: ${{ matrix.os }}
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
||||
tests: tests/e2e/nightly/features/test_qwen3_32b_int8_a3_feature_stack3.py
|
||||
qwen2-5-vl-7b:
|
||||
if: contains(github.event.pull_request.labels.*.name, 'run-nightly')
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ linux-aarch64-a3-4 ]
|
||||
uses: ./.github/workflows/_e2e_nightly.yaml
|
||||
with:
|
||||
vllm: v0.11.0
|
||||
runner: ${{ matrix.os }}
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
||||
tests: tests/e2e/nightly/models/test_qwen2_5_vl_7b.py
|
||||
deepseek-r1-0528-w8a8:
|
||||
if: contains(github.event.pull_request.labels.*.name, 'run-nightly')
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ linux-aarch64-a3-16 ]
|
||||
uses: ./.github/workflows/_e2e_nightly.yaml
|
||||
with:
|
||||
vllm: v0.11.0
|
||||
runner: ${{ matrix.os }}
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
||||
tests: tests/e2e/nightly/models/test_deepseek_r1_0528_w8a8.py
|
||||
60
.github/workflows/vllm_ascend_test_nightly_a2.yaml
vendored
Normal file
60
.github/workflows/vllm_ascend_test_nightly_a2.yaml
vendored
Normal file
@@ -0,0 +1,60 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
|
||||
# This workflow related to the resources atlas 800 A2
|
||||
# We will not limit the concurrency of jobs on A2
|
||||
name: 'ascend test / nightly-a2'
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run test at 24:00 Beijing time (UTC+8)
|
||||
- cron: "0 16 * * *"
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
|
||||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
||||
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
||||
# It's used to activate ascend-toolkit environment variables.
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -el {0}
|
||||
|
||||
# only cancel in-progress runs of the same workflow
|
||||
concurrency:
|
||||
group: ascend-nightly-${{ github.ref }}-a2
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
single-node-tests:
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
test_config:
|
||||
- name: qwen3-32b
|
||||
os: linux-aarch64-a2-4
|
||||
tests: tests/e2e/nightly/models/test_qwen3_32b.py
|
||||
- name: qwen3-32b-in8-a2
|
||||
os: linux-aarch64-a2-4
|
||||
tests: tests/e2e/nightly/models/test_qwen3_32b_int8.py
|
||||
uses: ./.github/workflows/_e2e_nightly_single_node.yaml
|
||||
with:
|
||||
vllm: v0.11.0
|
||||
runner: ${{ matrix.test_config.os }}
|
||||
tests: ${{ matrix.test_config.tests }}
|
||||
98
.github/workflows/vllm_ascend_test_nightly_a3.yaml
vendored
Normal file
98
.github/workflows/vllm_ascend_test_nightly_a3.yaml
vendored
Normal file
@@ -0,0 +1,98 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
|
||||
# This workflow related to the resources atlas 800 A3
|
||||
# **Please note**: current A3 resource pool's maximum allowed concurrency is 5*16 NPUs
|
||||
# We will limit the concurrency of jobs on A3 to avoid the risk of insufficient resources
|
||||
name: 'ascend test / nightly-a3'
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run test at 24:00 Beijing time (UTC+8)
|
||||
- cron: "0 16 * * *"
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
|
||||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
||||
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
||||
# It's used to activate ascend-toolkit environment variables.
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -el {0}
|
||||
|
||||
concurrency:
|
||||
group: ascend-nightly-${{ github.ref }}-a3
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
single-node-tests:
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
test_config:
|
||||
- name: qwen3-32b-in8-a3
|
||||
os: linux-aarch64-a3-4
|
||||
tests: tests/e2e/nightly/models/test_qwen3_32b_int8.py
|
||||
- name: qwen3-32b-int8-a3-feature-stack3
|
||||
os: linux-aarch64-a3-4
|
||||
tests: tests/e2e/nightly/features/test_qwen3_32b_int8_a3_feature_stack3.py
|
||||
- name: qwen3-235b-a22b-w8a8-eplb
|
||||
os: linux-aarch64-a3-16
|
||||
tests: tests/e2e/nightly/models/test_qwen3_235b_a22b_w8a8_eplb.py
|
||||
- name: deepseek-r1-w8a8-eplb
|
||||
os: linux-aarch64-a3-16
|
||||
tests: tests/e2e/nightly/models/test_deepseek_r1_w8a8_eplb.py
|
||||
- name: qwen2-5-vl-7b
|
||||
os: linux-aarch64-a3-4
|
||||
tests: tests/e2e/nightly/models/test_qwen2_5_vl_7b.py
|
||||
- name: deepseek-r1-0528-w8a8
|
||||
os: linux-aarch64-a3-16
|
||||
tests: tests/e2e/nightly/models/test_deepseek_r1_0528_w8a8.py
|
||||
uses: ./.github/workflows/_e2e_nightly_single_node.yaml
|
||||
with:
|
||||
vllm: v0.11.0
|
||||
runner: ${{ matrix.test_config.os }}
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
||||
tests: ${{ matrix.test_config.tests }}
|
||||
|
||||
multi-node-tests:
|
||||
needs: single-node-tests
|
||||
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
|
||||
strategy:
|
||||
fail-fast: false
|
||||
max-parallel: 1
|
||||
matrix:
|
||||
test_config:
|
||||
- name: multi-node-deepseek-pd
|
||||
config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml
|
||||
size: 2
|
||||
- name: multi-node-qwen3-dp
|
||||
config_file_path: tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml
|
||||
size: 2
|
||||
- name: multi-node-dpsk-4node-pd
|
||||
config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml
|
||||
size: 4
|
||||
uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
|
||||
with:
|
||||
soc_version: a3
|
||||
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
||||
replicas: 1
|
||||
size: ${{ matrix.test_config.size }}
|
||||
config_file_path: ${{ matrix.test_config.config_file_path }}
|
||||
Reference in New Issue
Block a user