[CI] Multi-Node CI scalable (#3611)
### What this PR does / why we need it? This PR adds a jinja template for the k8s configuration file, prepare for the upcoming 4-node CI ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
11
.github/workflows/multi_node_test.yaml
vendored
11
.github/workflows/multi_node_test.yaml
vendored
@@ -35,6 +35,7 @@ jobs:
|
|||||||
# configure apt and pip source
|
# configure apt and pip source
|
||||||
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
|
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
|
||||||
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
||||||
|
pip install jinja2-cli -y
|
||||||
|
|
||||||
apt-get update -y && apt-get install -y git curl
|
apt-get update -y && apt-get install -y git curl
|
||||||
|
|
||||||
@@ -58,7 +59,13 @@ jobs:
|
|||||||
|
|
||||||
- name: Launch cluster
|
- name: Launch cluster
|
||||||
run: |
|
run: |
|
||||||
kubectl apply -f tests/e2e/multi_node/scripts/lws.yaml
|
jinja2 tests/e2e/multi_node/scripts/lws.yaml.jinja2 \
|
||||||
|
-D size=2 \
|
||||||
|
-D replicas=1 \
|
||||||
|
-D image="m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11" \
|
||||||
|
--outfile lws.yaml
|
||||||
|
|
||||||
|
kubectl apply -f ./lws.yaml
|
||||||
|
|
||||||
- name: Waiting for pod ready
|
- name: Waiting for pod ready
|
||||||
run: |
|
run: |
|
||||||
@@ -115,4 +122,4 @@ jobs:
|
|||||||
if: always()
|
if: always()
|
||||||
run: |
|
run: |
|
||||||
kubectl get pods -n $NAMESPACE
|
kubectl get pods -n $NAMESPACE
|
||||||
kubectl delete -f tests/e2e/multi_node/scripts/lws.yaml
|
kubectl delete -f ./lws.yaml
|
||||||
|
|||||||
@@ -26,9 +26,7 @@ on:
|
|||||||
branches:
|
branches:
|
||||||
- 'main'
|
- 'main'
|
||||||
- '*-dev'
|
- '*-dev'
|
||||||
paths:
|
types: [labeled]
|
||||||
- 'tests/e2e/nightly/**'
|
|
||||||
- '.github/workflows/vllm_ascend_test_nightly.yaml'
|
|
||||||
|
|
||||||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
||||||
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
||||||
@@ -45,6 +43,7 @@ concurrency:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
qwen3-32b:
|
qwen3-32b:
|
||||||
|
if: contains(github.event.pull_request.labels.*.name, 'run-nightly')
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
# should add A3 chip runner when available
|
# should add A3 chip runner when available
|
||||||
@@ -57,6 +56,7 @@ jobs:
|
|||||||
runner: ${{ matrix.os }}
|
runner: ${{ matrix.os }}
|
||||||
tests: tests/e2e/nightly/models/test_qwen3_32b.py
|
tests: tests/e2e/nightly/models/test_qwen3_32b.py
|
||||||
qwen3-32b-in8-a3:
|
qwen3-32b-in8-a3:
|
||||||
|
if: contains(github.event.pull_request.labels.*.name, 'run-nightly')
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
os: [ linux-aarch64-a3-4 ]
|
os: [ linux-aarch64-a3-4 ]
|
||||||
@@ -67,6 +67,7 @@ jobs:
|
|||||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
||||||
tests: tests/e2e/nightly/models/test_qwen3_32b_int8.py
|
tests: tests/e2e/nightly/models/test_qwen3_32b_int8.py
|
||||||
qwen3-32b-in8-a2:
|
qwen3-32b-in8-a2:
|
||||||
|
if: contains(github.event.pull_request.labels.*.name, 'run-nightly')
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
os: [ linux-aarch64-a2-4 ]
|
os: [ linux-aarch64-a2-4 ]
|
||||||
@@ -76,6 +77,7 @@ jobs:
|
|||||||
runner: ${{ matrix.os }}
|
runner: ${{ matrix.os }}
|
||||||
tests: tests/e2e/nightly/models/test_qwen3_32b_int8.py
|
tests: tests/e2e/nightly/models/test_qwen3_32b_int8.py
|
||||||
qwen3-235b-a22b-w8a8-eplb:
|
qwen3-235b-a22b-w8a8-eplb:
|
||||||
|
if: contains(github.event.pull_request.labels.*.name, 'run-nightly')
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
# should add A3 chip runner when available
|
# should add A3 chip runner when available
|
||||||
@@ -89,6 +91,7 @@ jobs:
|
|||||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
||||||
tests: tests/e2e/nightly/models/test_qwen3_235b_a22b_w8a8_eplb.py
|
tests: tests/e2e/nightly/models/test_qwen3_235b_a22b_w8a8_eplb.py
|
||||||
deepseek-r1-w8a8-eplb:
|
deepseek-r1-w8a8-eplb:
|
||||||
|
if: contains(github.event.pull_request.labels.*.name, 'run-nightly')
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
# should add A3 chip runner when available
|
# should add A3 chip runner when available
|
||||||
|
|||||||
@@ -4,9 +4,9 @@ metadata:
|
|||||||
name: vllm
|
name: vllm
|
||||||
namespace: vllm-project
|
namespace: vllm-project
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: {{ replicas | default(1) }}
|
||||||
leaderWorkerTemplate:
|
leaderWorkerTemplate:
|
||||||
size: 2
|
size: {{ size | default(2) }}
|
||||||
restartPolicy: RecreateGroupOnPodRestart
|
restartPolicy: RecreateGroupOnPodRestart
|
||||||
leaderTemplate:
|
leaderTemplate:
|
||||||
metadata:
|
metadata:
|
||||||
@@ -15,7 +15,7 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: vllm-leader
|
- name: vllm-leader
|
||||||
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11") }}
|
||||||
env:
|
env:
|
||||||
- name: WORKSPACE
|
- name: WORKSPACE
|
||||||
value: "/root/workspace"
|
value: "/root/workspace"
|
||||||
@@ -24,8 +24,6 @@ spec:
|
|||||||
value: "v0.11.0"
|
value: "v0.11.0"
|
||||||
- name: VLLM_ASCEND_VERSION
|
- name: VLLM_ASCEND_VERSION
|
||||||
value: "main"
|
value: "main"
|
||||||
- name: MOONCAKE_VERSION
|
|
||||||
value: "06cc217504a6f1b0cdaa26b096b985651b262748"
|
|
||||||
command:
|
command:
|
||||||
- sh
|
- sh
|
||||||
- -c
|
- -c
|
||||||
@@ -70,7 +68,7 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: vllm-worker
|
- name: vllm-worker
|
||||||
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11") }}
|
||||||
env:
|
env:
|
||||||
- name: WORKSPACE
|
- name: WORKSPACE
|
||||||
value: "/root/workspace"
|
value: "/root/workspace"
|
||||||
@@ -79,8 +77,6 @@ spec:
|
|||||||
value: "v0.11.0"
|
value: "v0.11.0"
|
||||||
- name: VLLM_ASCEND_VERSION
|
- name: VLLM_ASCEND_VERSION
|
||||||
value: "main"
|
value: "main"
|
||||||
- name: MOONCAKE_VERSION
|
|
||||||
value: "06cc217504a6f1b0cdaa26b096b985651b262748"
|
|
||||||
command:
|
command:
|
||||||
- sh
|
- sh
|
||||||
- -c
|
- -c
|
||||||
Reference in New Issue
Block a user