apiVersion: leaderworkerset.x-k8s.io/v1 kind: LeaderWorkerSet metadata: name: vllm namespace: vllm-project spec: replicas: {{ replicas | default(1) }} leaderWorkerTemplate: size: {{ size | default(2) }} restartPolicy: None leaderTemplate: metadata: labels: role: leader spec: containers: - name: vllm-leader image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11") }} env: - name: CONFIG_YAML_PATH value: {{ config_file_path | default("DeepSeek-V3.yaml") }} - name: WORKSPACE value: "/root/workspace" # Set vLLM version and vLLM-Ascend version here, once there is a new release, update here. - name: VLLM_VERSION value: "v0.11.0" - name: VLLM_ASCEND_VERSION value: {{ vllm_ascend_ref | default("main") }} - name: VLLM_ASCEND_REMOTE_URL value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }} - name: RESULT_FILE_PATH value: {{ result_file_path | default("/root/.cache/tests/ret") }} - name: FAIL_TAG value: {{ fail_tag | default("FAIL_TAG") }} command: - sh - -c - | bash /root/.cache/tests/run.sh resources: limits: huawei.com/ascend-1980: {{ npu_per_node | default("16") }} memory: 512Gi ephemeral-storage: 100Gi requests: huawei.com/ascend-1980: {{ npu_per_node | default("16") }} ephemeral-storage: 100Gi cpu: 125 ports: - containerPort: 8080 # readinessProbe: # tcpSocket: # port: 8080 # initialDelaySeconds: 15 # periodSeconds: 10 volumeMounts: - mountPath: /root/.cache name: shared-volume - mountPath: /usr/local/Ascend/driver/tools name: driver-tools - mountPath: /dev/shm name: dshm volumes: - name: dshm emptyDir: medium: Memory sizeLimit: 15Gi - name: shared-volume persistentVolumeClaim: claimName: nv-action-vllm-benchmarks-v2 - name: driver-tools hostPath: path: /usr/local/Ascend/driver/tools workerTemplate: spec: containers: - name: vllm-worker image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11") }} env: - name: CONFIG_YAML_PATH value: {{ config_file_path | default("DeepSeek-V3.yaml") }} - name: WORKSPACE value: "/root/workspace" # Set vLLM version and vLLM-Ascend version here, once there is a new release, update here. - name: VLLM_VERSION value: "v0.11.0" - name: VLLM_ASCEND_VERSION value: {{ vllm_ascend_ref | default("main") }} - name: VLLM_ASCEND_REMOTE_URL value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }} - name: RESULT_FILE_PATH value: {{ result_file_path | default("/root/.cache/tests/ret/test_result.txt") }} - name: FAIL_TAG value: {{ fail_tag | default("FAIL_TAG") }} command: - sh - -c - | bash /root/.cache/tests/run.sh resources: limits: huawei.com/ascend-1980: {{ npu_per_node | default("16") }} memory: 512Gi ephemeral-storage: 100Gi requests: huawei.com/ascend-1980: {{ npu_per_node | default("16") }} ephemeral-storage: 100Gi cpu: 125 volumeMounts: - mountPath: /root/.cache name: shared-volume - mountPath: /usr/local/Ascend/driver/tools name: driver-tools - mountPath: /dev/shm name: dshm volumes: - name: dshm emptyDir: medium: Memory sizeLimit: 15Gi - name: shared-volume persistentVolumeClaim: claimName: nv-action-vllm-benchmarks-v2 - name: driver-tools hostPath: path: /usr/local/Ascend/driver/tools --- apiVersion: v1 kind: Service metadata: name: vllm-leader namespace: vllm-project spec: ports: - name: http port: 8080 protocol: TCP targetPort: 8080 selector: leaderworkerset.sigs.k8s.io/name: vllm role: leader type: ClusterIP