### What this PR does / why we need it?
Given the current excessively long build time of our nightly-ci, I
recommend installing necessary, confirmed versions of packages in the
Docker image to reduce the time required for integration testing.
Including Mooncake vllm with fixed tags, This is expected to reduce
nightly-ci duration by 2 hours.
- vLLM version: v0.11.0
- vLLM main:
2918c1b49c
---------
Signed-off-by: wangli <wangli858794774@gmail.com>
139 lines
4.8 KiB
Django/Jinja
139 lines
4.8 KiB
Django/Jinja
apiVersion: leaderworkerset.x-k8s.io/v1
|
|
kind: LeaderWorkerSet
|
|
metadata:
|
|
name: vllm
|
|
namespace: vllm-project
|
|
spec:
|
|
replicas: {{ replicas | default(1) }}
|
|
leaderWorkerTemplate:
|
|
size: {{ size | default(2) }}
|
|
restartPolicy: None
|
|
leaderTemplate:
|
|
metadata:
|
|
labels:
|
|
role: leader
|
|
spec:
|
|
containers:
|
|
- name: vllm-leader
|
|
image: {{ image | default("swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3") }}
|
|
env:
|
|
- name: CONFIG_YAML_PATH
|
|
value: {{ config_file_path | default("DeepSeek-V3.yaml") }}
|
|
- name: WORKSPACE
|
|
value: "/vllm-workspace"
|
|
# Set vLLM version and vLLM-Ascend version here, once there is a new release, update here.
|
|
- name: VLLM_ASCEND_VERSION
|
|
value: {{ vllm_ascend_ref | default("main") }}
|
|
- name: VLLM_ASCEND_REMOTE_URL
|
|
value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }}
|
|
- name: RESULT_FILE_PATH
|
|
value: {{ result_file_path | default("/root/.cache/tests/ret") }}
|
|
- name: FAIL_TAG
|
|
value: {{ fail_tag | default("FAIL_TAG") }}
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
bash /root/.cache/tests/run.sh
|
|
resources:
|
|
limits:
|
|
huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
|
|
memory: 512Gi
|
|
ephemeral-storage: 100Gi
|
|
requests:
|
|
huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
|
|
ephemeral-storage: 100Gi
|
|
cpu: 125
|
|
ports:
|
|
- containerPort: 8080
|
|
# readinessProbe:
|
|
# tcpSocket:
|
|
# port: 8080
|
|
# initialDelaySeconds: 15
|
|
# periodSeconds: 10
|
|
volumeMounts:
|
|
- mountPath: /root/.cache
|
|
name: shared-volume
|
|
- mountPath: /usr/local/Ascend/driver/tools
|
|
name: driver-tools
|
|
- mountPath: /dev/shm
|
|
name: dshm
|
|
volumes:
|
|
- name: dshm
|
|
emptyDir:
|
|
medium: Memory
|
|
sizeLimit: 15Gi
|
|
- name: shared-volume
|
|
persistentVolumeClaim:
|
|
claimName: nv-action-vllm-benchmarks-v2
|
|
- name: driver-tools
|
|
hostPath:
|
|
path: /usr/local/Ascend/driver/tools
|
|
workerTemplate:
|
|
spec:
|
|
containers:
|
|
- name: vllm-worker
|
|
image: {{ image | default("swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3") }}
|
|
env:
|
|
- name: CONFIG_YAML_PATH
|
|
value: {{ config_file_path | default("DeepSeek-V3.yaml") }}
|
|
- name: WORKSPACE
|
|
value: "/vllm-workspace"
|
|
# Set vLLM version and vLLM-Ascend version here, once there is a new release, update here.
|
|
- name: VLLM_ASCEND_VERSION
|
|
value: {{ vllm_ascend_ref | default("main") }}
|
|
- name: VLLM_ASCEND_REMOTE_URL
|
|
value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }}
|
|
- name: RESULT_FILE_PATH
|
|
value: {{ result_file_path | default("/root/.cache/tests/ret/test_result.txt") }}
|
|
- name: FAIL_TAG
|
|
value: {{ fail_tag | default("FAIL_TAG") }}
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
bash /root/.cache/tests/run.sh
|
|
resources:
|
|
limits:
|
|
huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
|
|
memory: 512Gi
|
|
ephemeral-storage: 100Gi
|
|
requests:
|
|
huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
|
|
ephemeral-storage: 100Gi
|
|
cpu: 125
|
|
volumeMounts:
|
|
- mountPath: /root/.cache
|
|
name: shared-volume
|
|
- mountPath: /usr/local/Ascend/driver/tools
|
|
name: driver-tools
|
|
- mountPath: /dev/shm
|
|
name: dshm
|
|
volumes:
|
|
- name: dshm
|
|
emptyDir:
|
|
medium: Memory
|
|
sizeLimit: 15Gi
|
|
- name: shared-volume
|
|
persistentVolumeClaim:
|
|
claimName: nv-action-vllm-benchmarks-v2
|
|
- name: driver-tools
|
|
hostPath:
|
|
path: /usr/local/Ascend/driver/tools
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: vllm-leader
|
|
namespace: vllm-project
|
|
spec:
|
|
ports:
|
|
- name: http
|
|
port: 8080
|
|
protocol: TCP
|
|
targetPort: 8080
|
|
selector:
|
|
leaderworkerset.sigs.k8s.io/name: vllm
|
|
role: leader
|
|
type: ClusterIP
|