apiVersion: leaderworkerset.x-k8s.io/v1 kind: LeaderWorkerSet metadata: name: vllm namespace: vllm-project spec: replicas: 1 leaderWorkerTemplate: size: 2 restartPolicy: RecreateGroupOnPodRestart leaderTemplate: metadata: labels: role: leader spec: containers: - name: vllm-leader image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11 env: - name: WORKSPACE value: "/root/workspace" # Set vLLM version and vLLM-Ascend version here, once there is a new release, update here. - name: VLLM_VERSION value: "v0.11.0" - name: VLLM_ASCEND_VERSION value: "main" - name: MOONCAKE_VERSION value: "06cc217504a6f1b0cdaa26b096b985651b262748" command: - sh - -c - | bash /root/.cache/tests/run.sh tail -f /dev/null resources: limits: huawei.com/ascend-1980: "16" memory: 512Gi ephemeral-storage: 100Gi requests: huawei.com/ascend-1980: "16" ephemeral-storage: 100Gi cpu: 125 ports: - containerPort: 8080 # readinessProbe: # tcpSocket: # port: 8080 # initialDelaySeconds: 15 # periodSeconds: 10 volumeMounts: - mountPath: /root/.cache name: shared-volume - mountPath: /usr/local/Ascend/driver/tools name: driver-tools - mountPath: /dev/shm name: dshm volumes: - name: dshm emptyDir: medium: Memory sizeLimit: 15Gi - name: shared-volume persistentVolumeClaim: claimName: nv-action-vllm-benchmarks-v2 - name: driver-tools hostPath: path: /usr/local/Ascend/driver/tools workerTemplate: spec: containers: - name: vllm-worker image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11 env: - name: WORKSPACE value: "/root/workspace" # Set vLLM version and vLLM-Ascend version here, once there is a new release, update here. - name: VLLM_VERSION value: "v0.11.0" - name: VLLM_ASCEND_VERSION value: "main" - name: MOONCAKE_VERSION value: "06cc217504a6f1b0cdaa26b096b985651b262748" command: - sh - -c - | bash /root/.cache/tests/run.sh tail -f /dev/null resources: limits: huawei.com/ascend-1980: "16" memory: 512Gi ephemeral-storage: 100Gi requests: huawei.com/ascend-1980: "16" ephemeral-storage: 100Gi cpu: 125 volumeMounts: - mountPath: /root/.cache name: shared-volume - mountPath: /usr/local/Ascend/driver/tools name: driver-tools - mountPath: /dev/shm name: dshm volumes: - name: dshm emptyDir: medium: Memory sizeLimit: 15Gi - name: shared-volume persistentVolumeClaim: claimName: nv-action-vllm-benchmarks-v2 - name: driver-tools hostPath: path: /usr/local/Ascend/driver/tools --- apiVersion: v1 kind: Service metadata: name: vllm-leader namespace: vllm-project spec: ports: - name: http port: 8080 protocol: TCP targetPort: 8080 selector: leaderworkerset.sigs.k8s.io/name: vllm role: leader type: ClusterIP