[CI] Move nightly-a2 test to hk (#5807)
### What this PR does / why we need it?
This patch initial testing involved connecting two nodes from the HK
region to nightly A2.
- vLLM version: v0.13.0
- vLLM main:
2f4e6548ef
---------
Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
138
tests/e2e/nightly/multi_node/scripts/lws-a2.yaml.jinja2
Normal file
138
tests/e2e/nightly/multi_node/scripts/lws-a2.yaml.jinja2
Normal file
@@ -0,0 +1,138 @@
|
||||
apiVersion: leaderworkerset.x-k8s.io/v1
|
||||
kind: LeaderWorkerSet
|
||||
metadata:
|
||||
name: vllm
|
||||
namespace: vllm-project
|
||||
spec:
|
||||
replicas: {{ replicas | default(1) }}
|
||||
leaderWorkerTemplate:
|
||||
size: {{ size | default(2) }}
|
||||
restartPolicy: None
|
||||
leaderTemplate:
|
||||
metadata:
|
||||
labels:
|
||||
role: leader
|
||||
spec:
|
||||
schedulerName: volcano
|
||||
tolerations:
|
||||
- key: "instance"
|
||||
operator: "Equal"
|
||||
value: "vllm"
|
||||
effect: "NoSchedule"
|
||||
containers:
|
||||
- name: vllm-leader
|
||||
imagePullPolicy: Always
|
||||
image: {{ image | default("swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2") }}
|
||||
env:
|
||||
- name: CONFIG_YAML_PATH
|
||||
value: {{ config_file_path | default("DeepSeek-V3.yaml") }}
|
||||
- name: WORKSPACE
|
||||
value: "/vllm-workspace"
|
||||
- name: FAIL_TAG
|
||||
value: {{ fail_tag | default("FAIL_TAG") }}
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
bash /root/.cache/tests/run.sh
|
||||
resources:
|
||||
limits:
|
||||
huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
|
||||
memory: 512Gi
|
||||
ephemeral-storage: 100Gi
|
||||
requests:
|
||||
huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
|
||||
ephemeral-storage: 100Gi
|
||||
cpu: 125
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
# readinessProbe:
|
||||
# tcpSocket:
|
||||
# port: 8080
|
||||
# initialDelaySeconds: 15
|
||||
# periodSeconds: 10
|
||||
volumeMounts:
|
||||
- mountPath: /root/.cache
|
||||
name: shared-volume
|
||||
- mountPath: /usr/local/Ascend/driver/tools
|
||||
name: driver-tools
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 15Gi
|
||||
- name: shared-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: vllm-project-hk001
|
||||
- name: driver-tools
|
||||
hostPath:
|
||||
path: /usr/local/Ascend/driver/tools
|
||||
workerTemplate:
|
||||
spec:
|
||||
schedulerName: volcano
|
||||
tolerations:
|
||||
- key: "instance"
|
||||
operator: "Equal"
|
||||
value: "vllm"
|
||||
effect: "NoSchedule"
|
||||
containers:
|
||||
- name: vllm-worker
|
||||
imagePullPolicy: Always
|
||||
image: {{ image | default("swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2") }}
|
||||
env:
|
||||
- name: CONFIG_YAML_PATH
|
||||
value: {{ config_file_path | default("DeepSeek-V3.yaml") }}
|
||||
- name: WORKSPACE
|
||||
value: "/vllm-workspace"
|
||||
- name: FAIL_TAG
|
||||
value: {{ fail_tag | default("FAIL_TAG") }}
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
bash /root/.cache/tests/run.sh
|
||||
resources:
|
||||
limits:
|
||||
huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
|
||||
memory: 512Gi
|
||||
ephemeral-storage: 100Gi
|
||||
requests:
|
||||
huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
|
||||
ephemeral-storage: 100Gi
|
||||
cpu: 125
|
||||
volumeMounts:
|
||||
- mountPath: /root/.cache
|
||||
name: shared-volume
|
||||
- mountPath: /usr/local/Ascend/driver/tools
|
||||
name: driver-tools
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 15Gi
|
||||
- name: shared-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: vllm-project-hk001
|
||||
- name: driver-tools
|
||||
hostPath:
|
||||
path: /usr/local/Ascend/driver/tools
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vllm-leader
|
||||
namespace: vllm-project
|
||||
spec:
|
||||
ports:
|
||||
- name: http
|
||||
port: 8080
|
||||
protocol: TCP
|
||||
targetPort: 8080
|
||||
selector:
|
||||
leaderworkerset.sigs.k8s.io/name: vllm
|
||||
role: leader
|
||||
type: ClusterIP
|
||||
Reference in New Issue
Block a user