[CI] Multi-Node CI scalable (#3611)

### What this PR does / why we need it?
This PR adds a jinja template for the k8s configuration file, prepare
for the upcoming 4-node CI
### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.11.0rc3
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0

---------

Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
Li Wang
2025-10-22 14:18:43 +08:00
committed by GitHub
parent bc30874f8b
commit 286ae9003d
3 changed files with 19 additions and 13 deletions

View File

@@ -4,9 +4,9 @@ metadata:
name: vllm
namespace: vllm-project
spec:
replicas: 1
replicas: {{ replicas | default(1) }}
leaderWorkerTemplate:
size: 2
size: {{ size | default(2) }}
restartPolicy: RecreateGroupOnPodRestart
leaderTemplate:
metadata:
@@ -15,7 +15,7 @@ spec:
spec:
containers:
- name: vllm-leader
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11") }}
env:
- name: WORKSPACE
value: "/root/workspace"
@@ -24,8 +24,6 @@ spec:
value: "v0.11.0"
- name: VLLM_ASCEND_VERSION
value: "main"
- name: MOONCAKE_VERSION
value: "06cc217504a6f1b0cdaa26b096b985651b262748"
command:
- sh
- -c
@@ -70,7 +68,7 @@ spec:
spec:
containers:
- name: vllm-worker
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11") }}
env:
- name: WORKSPACE
value: "/root/workspace"
@@ -79,8 +77,6 @@ spec:
value: "v0.11.0"
- name: VLLM_ASCEND_VERSION
value: "main"
- name: MOONCAKE_VERSION
value: "06cc217504a6f1b0cdaa26b096b985651b262748"
command:
- sh
- -c