[CI] Add multi-node test case for a2 (#3805)

### What this PR does / why we need it?
This patch add multi-node test case for a2
### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.11.0rc3
- vLLM main:
c9461e05a4

---------

Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
Li Wang
2025-10-27 23:10:17 +08:00
committed by GitHub
parent 9030106a14
commit f846bd20e4
5 changed files with 97 additions and 8 deletions

View File

@@ -7,6 +7,10 @@ on:
required: true
type: string
description: use a2 or a3
runner:
required: false
type: string
default: linux-aarch64-a3-0
image:
required: false
type: string
@@ -62,7 +66,7 @@ concurrency:
jobs:
e2e:
# This is a runner with no NPU for k8s controller
runs-on: linux-aarch64-a3-0
runs-on: ${{ inputs.runner }}
container:
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
env:
@@ -90,8 +94,7 @@ jobs:
kubectl version --client=true
# TODO: Add A2 tests
- name: Setup kubeconfig for A3
if: inputs.soc_version == 'a3'
- name: Decode kubeconfig from secrets
run: |
# Decode and save kubeconfig
echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG
@@ -129,6 +132,12 @@ jobs:
fi
done
if [ "${{ inputs.soc_version }}" = "a3" ]; then
npu_per_node=16
else
npu_per_node=8
fi
jinja2 tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 \
-D size="$size" \
-D replicas="$replicas" \
@@ -138,6 +147,7 @@ jobs:
-D vllm_ascend_remote_url="$vllm_ascend_remote_url" \
-D vllm_ascend_ref="$vllm_ascend_ref" \
-D result_file_path="$result_file_path" \
-D npu_per_node="$npu_per_node" \
--outfile lws.yaml
kubectl apply -f ./lws.yaml

View File

@@ -61,3 +61,25 @@ jobs:
vllm: v0.11.0
runner: ${{ matrix.test_config.os }}
tests: ${{ matrix.test_config.tests }}
multi-node-tests:
needs: single-node-tests
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
strategy:
fail-fast: false
max-parallel: 1
matrix:
test_config:
- name: multi-node-deepseek-dp
config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2.yaml
size: 2
uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
with:
soc_version: a2
runner: linux-aarch64-a2-0
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
replicas: 1
size: ${{ matrix.test_config.size }}
config_file_path: ${{ matrix.test_config.config_file_path }}
secrets:
KUBECONFIG_B64: ${{ secrets.KUBECONFIG_A2_B64 }}

View File

@@ -104,10 +104,10 @@ jobs:
uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
with:
soc_version: a3
runner: linux-aarch64-a3-0
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
replicas: 1
size: ${{ matrix.test_config.size }}
config_file_path: ${{ matrix.test_config.config_file_path }}
secrets:
KUBECONFIG_B64: ${{ secrets.KUBECONFIG_B64 }}