This PR upgrade CANN from 8.2rc1 to 8.3rc1 and remove the CANN version
check logic.
TODO: we notice that UT runs failed with CANN 8.3 image. So the base
image for UT is still 8.2. We'll fix it later.
- vLLM version: v0.11.0
- vLLM main:
83f478bb19
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
133 lines
3.9 KiB
YAML
133 lines
3.9 KiB
YAML
apiVersion: leaderworkerset.x-k8s.io/v1
|
|
kind: LeaderWorkerSet
|
|
metadata:
|
|
name: vllm
|
|
namespace: vllm-project
|
|
spec:
|
|
replicas: 1
|
|
leaderWorkerTemplate:
|
|
size: 2
|
|
restartPolicy: RecreateGroupOnPodRestart
|
|
leaderTemplate:
|
|
metadata:
|
|
labels:
|
|
role: leader
|
|
spec:
|
|
containers:
|
|
- name: vllm-leader
|
|
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
|
|
env:
|
|
- name: WORKSPACE
|
|
value: "/root/workspace"
|
|
# Set vLLM version and vLLM-Ascend version here, once there is a new release, update here.
|
|
- name: VLLM_VERSION
|
|
value: "v0.11.0"
|
|
- name: VLLM_ASCEND_VERSION
|
|
value: "main"
|
|
- name: MOONCAKE_VERSION
|
|
value: "06cc217504a6f1b0cdaa26b096b985651b262748"
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
bash /root/.cache/tests/run.sh
|
|
tail -f /dev/null
|
|
resources:
|
|
limits:
|
|
huawei.com/ascend-1980: "16"
|
|
memory: 512Gi
|
|
ephemeral-storage: 100Gi
|
|
requests:
|
|
huawei.com/ascend-1980: "16"
|
|
ephemeral-storage: 100Gi
|
|
cpu: 125
|
|
ports:
|
|
- containerPort: 8080
|
|
# readinessProbe:
|
|
# tcpSocket:
|
|
# port: 8080
|
|
# initialDelaySeconds: 15
|
|
# periodSeconds: 10
|
|
volumeMounts:
|
|
- mountPath: /root/.cache
|
|
name: shared-volume
|
|
- mountPath: /usr/local/Ascend/driver/tools
|
|
name: driver-tools
|
|
- mountPath: /dev/shm
|
|
name: dshm
|
|
volumes:
|
|
- name: dshm
|
|
emptyDir:
|
|
medium: Memory
|
|
sizeLimit: 15Gi
|
|
- name: shared-volume
|
|
persistentVolumeClaim:
|
|
claimName: nv-action-vllm-benchmarks-v2
|
|
- name: driver-tools
|
|
hostPath:
|
|
path: /usr/local/Ascend/driver/tools
|
|
workerTemplate:
|
|
spec:
|
|
containers:
|
|
- name: vllm-worker
|
|
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
|
|
env:
|
|
- name: WORKSPACE
|
|
value: "/root/workspace"
|
|
# Set vLLM version and vLLM-Ascend version here, once there is a new release, update here.
|
|
- name: VLLM_VERSION
|
|
value: "v0.11.0"
|
|
- name: VLLM_ASCEND_VERSION
|
|
value: "main"
|
|
- name: MOONCAKE_VERSION
|
|
value: "06cc217504a6f1b0cdaa26b096b985651b262748"
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
bash /root/.cache/tests/run.sh
|
|
tail -f /dev/null
|
|
resources:
|
|
limits:
|
|
huawei.com/ascend-1980: "16"
|
|
memory: 512Gi
|
|
ephemeral-storage: 100Gi
|
|
requests:
|
|
huawei.com/ascend-1980: "16"
|
|
ephemeral-storage: 100Gi
|
|
cpu: 125
|
|
volumeMounts:
|
|
- mountPath: /root/.cache
|
|
name: shared-volume
|
|
- mountPath: /usr/local/Ascend/driver/tools
|
|
name: driver-tools
|
|
- mountPath: /dev/shm
|
|
name: dshm
|
|
volumes:
|
|
- name: dshm
|
|
emptyDir:
|
|
medium: Memory
|
|
sizeLimit: 15Gi
|
|
- name: shared-volume
|
|
persistentVolumeClaim:
|
|
claimName: nv-action-vllm-benchmarks-v2
|
|
- name: driver-tools
|
|
hostPath:
|
|
path: /usr/local/Ascend/driver/tools
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: vllm-leader
|
|
namespace: vllm-project
|
|
spec:
|
|
ports:
|
|
- name: http
|
|
port: 8080
|
|
protocol: TCP
|
|
targetPort: 8080
|
|
selector:
|
|
leaderworkerset.sigs.k8s.io/name: vllm
|
|
role: leader
|
|
type: ClusterIP
|