Files
xc-llm-ascend/tests/e2e/nightly/multi_node/scripts/lws.yaml
wangxiyuan 7ee0b0b5d8 [cherry-pick]Upgrade CANN to 8.3.rc1 (#3945) (#3962)
This PR upgrade CANN from 8.2rc1 to 8.3rc1 and remove the CANN version
check logic.

TODO: we notice that UT runs failed with CANN 8.3 image. So the base
image for UT is still 8.2. We'll fix it later.

- vLLM version: v0.11.0
- vLLM main:
83f478bb19

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-11-06 09:05:08 +08:00

133 lines
3.9 KiB
YAML

apiVersion: leaderworkerset.x-k8s.io/v1
kind: LeaderWorkerSet
metadata:
name: vllm
namespace: vllm-project
spec:
replicas: 1
leaderWorkerTemplate:
size: 2
restartPolicy: RecreateGroupOnPodRestart
leaderTemplate:
metadata:
labels:
role: leader
spec:
containers:
- name: vllm-leader
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
env:
- name: WORKSPACE
value: "/root/workspace"
# Set vLLM version and vLLM-Ascend version here, once there is a new release, update here.
- name: VLLM_VERSION
value: "v0.11.0"
- name: VLLM_ASCEND_VERSION
value: "main"
- name: MOONCAKE_VERSION
value: "06cc217504a6f1b0cdaa26b096b985651b262748"
command:
- sh
- -c
- |
bash /root/.cache/tests/run.sh
tail -f /dev/null
resources:
limits:
huawei.com/ascend-1980: "16"
memory: 512Gi
ephemeral-storage: 100Gi
requests:
huawei.com/ascend-1980: "16"
ephemeral-storage: 100Gi
cpu: 125
ports:
- containerPort: 8080
# readinessProbe:
# tcpSocket:
# port: 8080
# initialDelaySeconds: 15
# periodSeconds: 10
volumeMounts:
- mountPath: /root/.cache
name: shared-volume
- mountPath: /usr/local/Ascend/driver/tools
name: driver-tools
- mountPath: /dev/shm
name: dshm
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: 15Gi
- name: shared-volume
persistentVolumeClaim:
claimName: nv-action-vllm-benchmarks-v2
- name: driver-tools
hostPath:
path: /usr/local/Ascend/driver/tools
workerTemplate:
spec:
containers:
- name: vllm-worker
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
env:
- name: WORKSPACE
value: "/root/workspace"
# Set vLLM version and vLLM-Ascend version here, once there is a new release, update here.
- name: VLLM_VERSION
value: "v0.11.0"
- name: VLLM_ASCEND_VERSION
value: "main"
- name: MOONCAKE_VERSION
value: "06cc217504a6f1b0cdaa26b096b985651b262748"
command:
- sh
- -c
- |
bash /root/.cache/tests/run.sh
tail -f /dev/null
resources:
limits:
huawei.com/ascend-1980: "16"
memory: 512Gi
ephemeral-storage: 100Gi
requests:
huawei.com/ascend-1980: "16"
ephemeral-storage: 100Gi
cpu: 125
volumeMounts:
- mountPath: /root/.cache
name: shared-volume
- mountPath: /usr/local/Ascend/driver/tools
name: driver-tools
- mountPath: /dev/shm
name: dshm
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: 15Gi
- name: shared-volume
persistentVolumeClaim:
claimName: nv-action-vllm-benchmarks-v2
- name: driver-tools
hostPath:
path: /usr/local/Ascend/driver/tools
---
apiVersion: v1
kind: Service
metadata:
name: vllm-leader
namespace: vllm-project
spec:
ports:
- name: http
port: 8080
protocol: TCP
targetPort: 8080
selector:
leaderworkerset.sigs.k8s.io/name: vllm
role: leader
type: ClusterIP