apiVersion: leaderworkerset.x-k8s.io/v1 kind: LeaderWorkerSet metadata: name: vllm namespace: vllm-project spec: replicas: {{ replicas | default(1) }} leaderWorkerTemplate: size: {{ size | default(2) }} restartPolicy: None leaderTemplate: metadata: labels: role: leader spec: containers: - name: vllm-leader imagePullPolicy: Always image: {{ image | default("swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3") }} env: - name: CONFIG_YAML_PATH value: {{ config_file_path | default("DeepSeek-V3.yaml") }} - name: WORKSPACE value: "/vllm-workspace" - name: FAIL_TAG value: {{ fail_tag | default("FAIL_TAG") }} command: - sh - -c - | bash /root/.cache/tests/run.sh resources: limits: huawei.com/ascend-1980: {{ npu_per_node | default("16") }} memory: 512Gi ephemeral-storage: 100Gi requests: huawei.com/ascend-1980: {{ npu_per_node | default("16") }} ephemeral-storage: 100Gi cpu: 125 ports: - containerPort: 8080 # readinessProbe: # tcpSocket: # port: 8080 # initialDelaySeconds: 15 # periodSeconds: 10 volumeMounts: - mountPath: /root/.cache name: shared-volume - mountPath: /usr/local/Ascend/driver/tools name: driver-tools - mountPath: /dev/shm name: dshm volumes: - name: dshm emptyDir: medium: Memory sizeLimit: 15Gi - name: shared-volume persistentVolumeClaim: claimName: nv-action-vllm-benchmarks-v2 - name: driver-tools hostPath: path: /usr/local/Ascend/driver/tools workerTemplate: spec: containers: - name: vllm-worker imagePullPolicy: Always image: {{ image | default("swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3") }} env: - name: CONFIG_YAML_PATH value: {{ config_file_path | default("DeepSeek-V3.yaml") }} - name: WORKSPACE value: "/vllm-workspace" - name: FAIL_TAG value: {{ fail_tag | default("FAIL_TAG") }} command: - sh - -c - | bash /root/.cache/tests/run.sh resources: limits: huawei.com/ascend-1980: {{ npu_per_node | default("16") }} memory: 512Gi ephemeral-storage: 100Gi requests: huawei.com/ascend-1980: {{ npu_per_node | default("16") }} ephemeral-storage: 100Gi cpu: 125 volumeMounts: - mountPath: /root/.cache name: shared-volume - mountPath: /usr/local/Ascend/driver/tools name: driver-tools - mountPath: /dev/shm name: dshm volumes: - name: dshm emptyDir: medium: Memory sizeLimit: 15Gi - name: shared-volume persistentVolumeClaim: claimName: nv-action-vllm-benchmarks-v2 - name: driver-tools hostPath: path: /usr/local/Ascend/driver/tools --- apiVersion: v1 kind: Service metadata: name: vllm-leader namespace: vllm-project spec: ports: - name: http port: 8080 protocol: TCP targetPort: 8080 selector: leaderworkerset.sigs.k8s.io/name: vllm role: leader type: ClusterIP