Sync from v0.13
This commit is contained in:
6
examples/online_serving/chart-helm/.helmignore
Normal file
6
examples/online_serving/chart-helm/.helmignore
Normal file
@@ -0,0 +1,6 @@
|
||||
*.png
|
||||
.git/
|
||||
ct.yaml
|
||||
lintconf.yaml
|
||||
values.schema.json
|
||||
/workflows
|
||||
21
examples/online_serving/chart-helm/Chart.yaml
Normal file
21
examples/online_serving/chart-helm/Chart.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
apiVersion: v2
|
||||
name: chart-vllm
|
||||
description: Chart vllm
|
||||
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.0.1
|
||||
|
||||
maintainers:
|
||||
- name: mfournioux
|
||||
33
examples/online_serving/chart-helm/README.md
Normal file
33
examples/online_serving/chart-helm/README.md
Normal file
@@ -0,0 +1,33 @@
|
||||
# Helm Charts
|
||||
|
||||
This directory contains a Helm chart for deploying the vllm application. The chart includes configurations for deployment, autoscaling, resource management, and more.
|
||||
|
||||
## Files
|
||||
|
||||
- Chart.yaml: Defines the chart metadata including name, version, and maintainers.
|
||||
- ct.yaml: Configuration for chart testing.
|
||||
- lintconf.yaml: Linting rules for YAML files.
|
||||
- values.schema.json: JSON schema for validating values.yaml.
|
||||
- values.yaml: Default values for the Helm chart.
|
||||
- templates/_helpers.tpl: Helper templates for defining common configurations.
|
||||
- templates/configmap.yaml: Template for creating ConfigMaps.
|
||||
- templates/custom-objects.yaml: Template for custom Kubernetes objects.
|
||||
- templates/deployment.yaml: Template for creating Deployments.
|
||||
- templates/hpa.yaml: Template for Horizontal Pod Autoscaler.
|
||||
- templates/job.yaml: Template for Kubernetes Jobs.
|
||||
- templates/poddisruptionbudget.yaml: Template for Pod Disruption Budget.
|
||||
- templates/pvc.yaml: Template for Persistent Volume Claims.
|
||||
- templates/secrets.yaml: Template for Kubernetes Secrets.
|
||||
- templates/service.yaml: Template for creating Services.
|
||||
|
||||
## Running Tests
|
||||
|
||||
This chart includes unit tests using [helm-unittest](https://github.com/helm-unittest/helm-unittest). Install the plugin and run tests:
|
||||
|
||||
```bash
|
||||
# Install plugin
|
||||
helm plugin install https://github.com/helm-unittest/helm-unittest
|
||||
|
||||
# Run tests
|
||||
helm unittest .
|
||||
```
|
||||
3
examples/online_serving/chart-helm/ct.yaml
Normal file
3
examples/online_serving/chart-helm/ct.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
chart-dirs:
|
||||
- charts
|
||||
validate-maintainers: false
|
||||
42
examples/online_serving/chart-helm/lintconf.yaml
Normal file
42
examples/online_serving/chart-helm/lintconf.yaml
Normal file
@@ -0,0 +1,42 @@
|
||||
---
|
||||
rules:
|
||||
braces:
|
||||
min-spaces-inside: 0
|
||||
max-spaces-inside: 0
|
||||
min-spaces-inside-empty: -1
|
||||
max-spaces-inside-empty: -1
|
||||
brackets:
|
||||
min-spaces-inside: 0
|
||||
max-spaces-inside: 0
|
||||
min-spaces-inside-empty: -1
|
||||
max-spaces-inside-empty: -1
|
||||
colons:
|
||||
max-spaces-before: 0
|
||||
max-spaces-after: 1
|
||||
commas:
|
||||
max-spaces-before: 0
|
||||
min-spaces-after: 1
|
||||
max-spaces-after: 1
|
||||
comments:
|
||||
require-starting-space: true
|
||||
min-spaces-from-content: 2
|
||||
document-end: disable
|
||||
document-start: disable # No --- to start a file
|
||||
empty-lines:
|
||||
max: 2
|
||||
max-start: 0
|
||||
max-end: 0
|
||||
hyphens:
|
||||
max-spaces-after: 1
|
||||
indentation:
|
||||
spaces: consistent
|
||||
indent-sequences: whatever # - list indentation will handle both indentation and without
|
||||
check-multi-line-strings: false
|
||||
key-duplicates: enable
|
||||
line-length: disable # Lines can be any length
|
||||
new-line-at-end-of-file: disable
|
||||
new-lines:
|
||||
type: unix
|
||||
trailing-spaces: enable
|
||||
truthy:
|
||||
level: warning
|
||||
165
examples/online_serving/chart-helm/templates/_helpers.tpl
Normal file
165
examples/online_serving/chart-helm/templates/_helpers.tpl
Normal file
@@ -0,0 +1,165 @@
|
||||
{{/*
|
||||
Define ports for the pods
|
||||
*/}}
|
||||
{{- define "chart.container-port" -}}
|
||||
{{- default "8000" .Values.containerPort }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Define service name
|
||||
*/}}
|
||||
{{- define "chart.service-name" -}}
|
||||
{{- if .Values.serviceName }}
|
||||
{{- .Values.serviceName | lower | trim }}
|
||||
{{- else }}
|
||||
"{{ .Release.Name }}-service"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Define service port
|
||||
*/}}
|
||||
{{- define "chart.service-port" -}}
|
||||
{{- if .Values.servicePort }}
|
||||
{{- .Values.servicePort }}
|
||||
{{- else }}
|
||||
{{- include "chart.container-port" . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Define service port name
|
||||
*/}}
|
||||
{{- define "chart.service-port-name" -}}
|
||||
"service-port"
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Define container port name
|
||||
*/}}
|
||||
{{- define "chart.container-port-name" -}}
|
||||
"container-port"
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Define deployment strategy
|
||||
*/}}
|
||||
{{- define "chart.strategy" -}}
|
||||
strategy:
|
||||
{{- if not .Values.deploymentStrategy }}
|
||||
rollingUpdate:
|
||||
maxSurge: 100%
|
||||
maxUnavailable: 0
|
||||
{{- else }}
|
||||
{{ toYaml .Values.deploymentStrategy | indent 2 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Define additional ports
|
||||
*/}}
|
||||
{{- define "chart.extraPorts" }}
|
||||
{{- with .Values.extraPorts }}
|
||||
{{ toYaml . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Define chart external ConfigMaps and Secrets
|
||||
*/}}
|
||||
{{- define "chart.externalConfigs" -}}
|
||||
{{- with .Values.externalConfigs -}}
|
||||
{{ toYaml . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
|
||||
{{/*
|
||||
Define liveness et readiness probes
|
||||
*/}}
|
||||
{{- define "chart.probes" -}}
|
||||
{{- if .Values.readinessProbe }}
|
||||
readinessProbe:
|
||||
{{- with .Values.readinessProbe }}
|
||||
{{- toYaml . | nindent 2 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.livenessProbe }}
|
||||
livenessProbe:
|
||||
{{- with .Values.livenessProbe }}
|
||||
{{- toYaml . | nindent 2 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Define resources
|
||||
*/}}
|
||||
{{- define "chart.resources" -}}
|
||||
requests:
|
||||
memory: {{ required "Value 'resources.requests.memory' must be defined !" .Values.resources.requests.memory | quote }}
|
||||
cpu: {{ required "Value 'resources.requests.cpu' must be defined !" .Values.resources.requests.cpu | quote }}
|
||||
{{- if and (gt (int (index .Values.resources.requests "nvidia.com/gpu")) 0) (gt (int (index .Values.resources.limits "nvidia.com/gpu")) 0) }}
|
||||
nvidia.com/gpu: {{ required "Value 'resources.requests.nvidia.com/gpu' must be defined !" (index .Values.resources.requests "nvidia.com/gpu") | quote }}
|
||||
{{- end }}
|
||||
limits:
|
||||
memory: {{ required "Value 'resources.limits.memory' must be defined !" .Values.resources.limits.memory | quote }}
|
||||
cpu: {{ required "Value 'resources.limits.cpu' must be defined !" .Values.resources.limits.cpu | quote }}
|
||||
{{- if and (gt (int (index .Values.resources.requests "nvidia.com/gpu")) 0) (gt (int (index .Values.resources.limits "nvidia.com/gpu")) 0) }}
|
||||
nvidia.com/gpu: {{ required "Value 'resources.limits.nvidia.com/gpu' must be defined !" (index .Values.resources.limits "nvidia.com/gpu") | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
|
||||
{{/*
|
||||
Define User used for the main container
|
||||
*/}}
|
||||
{{- define "chart.user" }}
|
||||
{{- if .Values.image.runAsUser }}
|
||||
runAsUser:
|
||||
{{- with .Values.runAsUser }}
|
||||
{{- toYaml . | nindent 2 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
|
||||
{{- define "chart.extraInitEnv" -}}
|
||||
- name: S3_ENDPOINT_URL
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ .Release.Name }}-secrets
|
||||
key: s3endpoint
|
||||
- name: S3_BUCKET_NAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ .Release.Name }}-secrets
|
||||
key: s3bucketname
|
||||
- name: AWS_ACCESS_KEY_ID
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ .Release.Name }}-secrets
|
||||
key: s3accesskeyid
|
||||
- name: AWS_SECRET_ACCESS_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ .Release.Name }}-secrets
|
||||
key: s3accesskey
|
||||
{{- if .Values.extraInit.s3modelpath }}
|
||||
- name: S3_PATH
|
||||
value: "{{ .Values.extraInit.s3modelpath }}"
|
||||
{{- end }}
|
||||
{{- if hasKey .Values.extraInit "awsEc2MetadataDisabled" }}
|
||||
- name: AWS_EC2_METADATA_DISABLED
|
||||
value: "{{ .Values.extraInit.awsEc2MetadataDisabled }}"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Define chart labels
|
||||
*/}}
|
||||
{{- define "chart.labels" -}}
|
||||
{{- with .Values.labels -}}
|
||||
{{ toYaml . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
11
examples/online_serving/chart-helm/templates/configmap.yaml
Normal file
11
examples/online_serving/chart-helm/templates/configmap.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
{{- if .Values.configs -}}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: "{{ .Release.Name }}-configs"
|
||||
namespace: {{ .Release.Namespace }}
|
||||
data:
|
||||
{{- with .Values.configs }}
|
||||
{{- toYaml . | nindent 2 }}
|
||||
{{- end }}
|
||||
{{- end -}}
|
||||
@@ -0,0 +1,6 @@
|
||||
{{- if .Values.customObjects }}
|
||||
{{- range .Values.customObjects }}
|
||||
{{- tpl (. | toYaml) $ }}
|
||||
---
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
131
examples/online_serving/chart-helm/templates/deployment.yaml
Normal file
131
examples/online_serving/chart-helm/templates/deployment.yaml
Normal file
@@ -0,0 +1,131 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: "{{ .Release.Name }}-deployment-vllm"
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "chart.labels" . | nindent 4 }}
|
||||
spec:
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
{{- include "chart.strategy" . | nindent 2 }}
|
||||
selector:
|
||||
matchLabels:
|
||||
environment: "test"
|
||||
release: "test"
|
||||
progressDeadlineSeconds: 1200
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
environment: "test"
|
||||
release: "test"
|
||||
spec:
|
||||
containers:
|
||||
- name: "vllm"
|
||||
image: "{{ required "Required value 'image.repository' must be defined !" .Values.image.repository }}:{{ required "Required value 'image.tag' must be defined !" .Values.image.tag }}"
|
||||
{{- if .Values.image.command }}
|
||||
command :
|
||||
{{- with .Values.image.command }}
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
securityContext:
|
||||
{{- if .Values.image.securityContext }}
|
||||
{{- with .Values.image.securityContext }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- else }}
|
||||
runAsNonRoot: false
|
||||
{{- include "chart.user" . | indent 12 }}
|
||||
{{- end }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
{{- if .Values.image.env }}
|
||||
env :
|
||||
{{- with .Values.image.env }}
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- else }}
|
||||
env: []
|
||||
{{- end }}
|
||||
{{- if or .Values.externalConfigs .Values.configs .Values.secrets }}
|
||||
envFrom:
|
||||
{{- if .Values.configs }}
|
||||
- configMapRef:
|
||||
name: "{{ .Release.Name }}-configs"
|
||||
{{- end }}
|
||||
{{- if .Values.secrets}}
|
||||
- secretRef:
|
||||
name: "{{ .Release.Name }}-secrets"
|
||||
{{- end }}
|
||||
{{- include "chart.externalConfigs" . | nindent 12 }}
|
||||
{{- end }}
|
||||
ports:
|
||||
- name: {{ include "chart.container-port-name" . }}
|
||||
containerPort: {{ include "chart.container-port" . }}
|
||||
{{- include "chart.extraPorts" . | nindent 12 }}
|
||||
{{- include "chart.probes" . | indent 10 }}
|
||||
resources: {{- include "chart.resources" . | nindent 12 }}
|
||||
volumeMounts:
|
||||
- name: {{ .Release.Name }}-storage
|
||||
mountPath: /data
|
||||
|
||||
{{- with .Values.extraContainers }}
|
||||
{{ toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
|
||||
{{- if and .Values.extraInit (or .Values.extraInit.modelDownload.enabled .Values.extraInit.initContainers) }}
|
||||
initContainers:
|
||||
{{- if .Values.extraInit.modelDownload.enabled }}
|
||||
- name: wait-download-model
|
||||
image: {{ .Values.extraInit.modelDownload.image.repository }}:{{ .Values.extraInit.modelDownload.image.tag }}
|
||||
imagePullPolicy: {{ .Values.extraInit.modelDownload.image.pullPolicy }}
|
||||
command: {{ .Values.extraInit.modelDownload.waitContainer.command | toJson }}
|
||||
args:
|
||||
{{- toYaml .Values.extraInit.modelDownload.waitContainer.args | nindent 10 }}
|
||||
env:
|
||||
{{- if .Values.extraInit.modelDownload.waitContainer.env }}
|
||||
{{- toYaml .Values.extraInit.modelDownload.waitContainer.env | nindent 10 }}
|
||||
{{- else }}
|
||||
{{- include "chart.extraInitEnv" . | nindent 10 }}
|
||||
{{- end }}
|
||||
resources:
|
||||
requests:
|
||||
cpu: 200m
|
||||
memory: 1Gi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 2Gi
|
||||
volumeMounts:
|
||||
- name: {{ .Release.Name }}-storage
|
||||
mountPath: /data
|
||||
{{- end }}
|
||||
{{- with .Values.extraInit.initContainers }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
volumes:
|
||||
- name: {{ .Release.Name }}-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Release.Name }}-storage-claim
|
||||
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- if and (gt (int (index .Values.resources.requests "nvidia.com/gpu")) 0) (gt (int (index .Values.resources.limits "nvidia.com/gpu")) 0) }}
|
||||
runtimeClassName: nvidia
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: nvidia.com/gpu.product
|
||||
operator: In
|
||||
{{- with .Values.gpuModels }}
|
||||
values:
|
||||
{{- toYaml . | nindent 20 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
31
examples/online_serving/chart-helm/templates/hpa.yaml
Normal file
31
examples/online_serving/chart-helm/templates/hpa.yaml
Normal file
@@ -0,0 +1,31 @@
|
||||
{{- if .Values.autoscaling.enabled }}
|
||||
apiVersion: autoscaling/v2
|
||||
kind: HorizontalPodAutoscaler
|
||||
metadata:
|
||||
name: "{{ .Release.Name }}-hpa"
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: vllm
|
||||
minReplicas: {{ .Values.autoscaling.minReplicas }}
|
||||
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
|
||||
metrics:
|
||||
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
|
||||
- type: Resource
|
||||
resource:
|
||||
name: cpu
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
|
||||
{{- end }}
|
||||
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
|
||||
- type: Resource
|
||||
resource:
|
||||
name: memory
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
41
examples/online_serving/chart-helm/templates/job.yaml
Normal file
41
examples/online_serving/chart-helm/templates/job.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
{{- if and .Values.extraInit .Values.extraInit.modelDownload.enabled }}
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: "{{ .Release.Name }}-init-vllm"
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
ttlSecondsAfterFinished: 100
|
||||
template:
|
||||
metadata:
|
||||
name: init-vllm
|
||||
spec:
|
||||
containers:
|
||||
- name: job-download-model
|
||||
image: {{ .Values.extraInit.modelDownload.image.repository }}:{{ .Values.extraInit.modelDownload.image.tag }}
|
||||
imagePullPolicy: {{ .Values.extraInit.modelDownload.image.pullPolicy }}
|
||||
command: {{ .Values.extraInit.modelDownload.downloadJob.command | toJson }}
|
||||
args:
|
||||
{{- toYaml .Values.extraInit.modelDownload.downloadJob.args | nindent 8 }}
|
||||
env:
|
||||
{{- if .Values.extraInit.modelDownload.downloadJob.env }}
|
||||
{{- toYaml .Values.extraInit.modelDownload.downloadJob.env | nindent 8 }}
|
||||
{{- else }}
|
||||
{{- include "chart.extraInitEnv" . | nindent 8 }}
|
||||
{{- end }}
|
||||
volumeMounts:
|
||||
- name: {{ .Release.Name }}-storage
|
||||
mountPath: /data
|
||||
resources:
|
||||
requests:
|
||||
cpu: 200m
|
||||
memory: 1Gi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 2Gi
|
||||
restartPolicy: OnFailure
|
||||
volumes:
|
||||
- name: {{ .Release.Name }}-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: "{{ .Release.Name }}-storage-claim"
|
||||
{{- end }}
|
||||
@@ -0,0 +1,7 @@
|
||||
apiVersion: policy/v1
|
||||
kind: PodDisruptionBudget
|
||||
metadata:
|
||||
name: "{{ .Release.Name }}-pdb"
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
maxUnavailable: {{ default 1 .Values.maxUnavailablePodDisruptionBudget }}
|
||||
13
examples/online_serving/chart-helm/templates/pvc.yaml
Normal file
13
examples/online_serving/chart-helm/templates/pvc.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
{{- if .Values.extraInit }}
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: "{{ .Release.Name }}-storage-claim"
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.extraInit.pvcStorage }}
|
||||
{{- end }}
|
||||
10
examples/online_serving/chart-helm/templates/secrets.yaml
Normal file
10
examples/online_serving/chart-helm/templates/secrets.yaml
Normal file
@@ -0,0 +1,10 @@
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: "{{ .Release.Name }}-secrets"
|
||||
namespace: {{ .Release.Namespace }}
|
||||
type: Opaque
|
||||
data:
|
||||
{{- range $key, $val := .Values.secrets }}
|
||||
{{ $key }}: {{ $val | b64enc | quote }}
|
||||
{{- end }}
|
||||
14
examples/online_serving/chart-helm/templates/service.yaml
Normal file
14
examples/online_serving/chart-helm/templates/service.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: "{{ .Release.Name }}-service"
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- name: {{ include "chart.service-port-name" . }}
|
||||
port: {{ include "chart.service-port" . }}
|
||||
targetPort: {{ include "chart.container-port-name" . }}
|
||||
protocol: TCP
|
||||
selector:
|
||||
{{- include "chart.labels" . | nindent 4 }}
|
||||
135
examples/online_serving/chart-helm/tests/deployment_test.yaml
Normal file
135
examples/online_serving/chart-helm/tests/deployment_test.yaml
Normal file
@@ -0,0 +1,135 @@
|
||||
suite: test deployment
|
||||
templates:
|
||||
- deployment.yaml
|
||||
tests:
|
||||
- it: should create wait-download-model init container when modelDownload is enabled
|
||||
set:
|
||||
extraInit:
|
||||
modelDownload:
|
||||
enabled: true
|
||||
image:
|
||||
repository: "amazon/aws-cli"
|
||||
tag: "2.6.4"
|
||||
pullPolicy: "IfNotPresent"
|
||||
waitContainer:
|
||||
command: [ "/bin/bash" ]
|
||||
args:
|
||||
- "-eucx"
|
||||
- "while aws --endpoint-url $S3_ENDPOINT_URL s3 sync --dryrun s3://$S3_BUCKET_NAME/$S3_PATH /data | grep -q download; do sleep 10; done"
|
||||
downloadJob:
|
||||
command: [ "/bin/bash" ]
|
||||
args:
|
||||
- "-eucx"
|
||||
- "aws --endpoint-url $S3_ENDPOINT_URL s3 sync s3://$S3_BUCKET_NAME/$S3_PATH /data"
|
||||
initContainers: [ ]
|
||||
pvcStorage: "1Gi"
|
||||
s3modelpath: "relative_s3_model_path/opt-125m"
|
||||
awsEc2MetadataDisabled: true
|
||||
asserts:
|
||||
- hasDocuments:
|
||||
count: 1
|
||||
- isKind:
|
||||
of: Deployment
|
||||
- isNotEmpty:
|
||||
path: spec.template.spec.initContainers
|
||||
- equal:
|
||||
path: spec.template.spec.initContainers[0].name
|
||||
value: wait-download-model
|
||||
- equal:
|
||||
path: spec.template.spec.initContainers[0].image
|
||||
value: amazon/aws-cli:2.6.4
|
||||
- equal:
|
||||
path: spec.template.spec.initContainers[0].imagePullPolicy
|
||||
value: IfNotPresent
|
||||
|
||||
- it: should only create custom init containers when modelDownload is disabled
|
||||
set:
|
||||
extraInit:
|
||||
modelDownload:
|
||||
enabled: false
|
||||
image:
|
||||
repository: "amazon/aws-cli"
|
||||
tag: "2.6.4"
|
||||
pullPolicy: "IfNotPresent"
|
||||
waitContainer:
|
||||
command: [ "/bin/bash" ]
|
||||
args: [ "-c", "echo test" ]
|
||||
downloadJob:
|
||||
command: [ "/bin/bash" ]
|
||||
args: [ "-c", "echo test" ]
|
||||
initContainers:
|
||||
- name: llm-d-routing-proxy
|
||||
image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: proxy
|
||||
pvcStorage: "10Gi"
|
||||
asserts:
|
||||
- hasDocuments:
|
||||
count: 1
|
||||
- isKind:
|
||||
of: Deployment
|
||||
- lengthEqual:
|
||||
path: spec.template.spec.initContainers
|
||||
count: 1
|
||||
- equal:
|
||||
path: spec.template.spec.initContainers[0].name
|
||||
value: llm-d-routing-proxy
|
||||
- equal:
|
||||
path: spec.template.spec.initContainers[0].image
|
||||
value: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0
|
||||
- equal:
|
||||
path: spec.template.spec.initContainers[0].ports[0].containerPort
|
||||
value: 8080
|
||||
|
||||
- it: should create both wait-download-model and custom init containers when both are enabled
|
||||
set:
|
||||
extraInit:
|
||||
modelDownload:
|
||||
enabled: true
|
||||
image:
|
||||
repository: "amazon/aws-cli"
|
||||
tag: "2.6.4"
|
||||
pullPolicy: "IfNotPresent"
|
||||
waitContainer:
|
||||
command: [ "/bin/bash" ]
|
||||
args:
|
||||
- "-eucx"
|
||||
- "while aws --endpoint-url $S3_ENDPOINT_URL s3 sync --dryrun s3://$S3_BUCKET_NAME/$S3_PATH /data | grep -q download; do sleep 10; done"
|
||||
downloadJob:
|
||||
command: [ "/bin/bash" ]
|
||||
args:
|
||||
- "-eucx"
|
||||
- "aws --endpoint-url $S3_ENDPOINT_URL s3 sync s3://$S3_BUCKET_NAME/$S3_PATH /data"
|
||||
initContainers:
|
||||
- name: llm-d-routing-proxy
|
||||
image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: proxy
|
||||
pvcStorage: "10Gi"
|
||||
asserts:
|
||||
- hasDocuments:
|
||||
count: 1
|
||||
- isKind:
|
||||
of: Deployment
|
||||
- lengthEqual:
|
||||
path: spec.template.spec.initContainers
|
||||
count: 2
|
||||
- equal:
|
||||
path: spec.template.spec.initContainers[0].name
|
||||
value: wait-download-model
|
||||
- equal:
|
||||
path: spec.template.spec.initContainers[0].image
|
||||
value: amazon/aws-cli:2.6.4
|
||||
- equal:
|
||||
path: spec.template.spec.initContainers[1].name
|
||||
value: llm-d-routing-proxy
|
||||
- equal:
|
||||
path: spec.template.spec.initContainers[1].image
|
||||
value: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0
|
||||
- equal:
|
||||
path: spec.template.spec.initContainers[1].ports[0].containerPort
|
||||
value: 8080
|
||||
61
examples/online_serving/chart-helm/tests/job_test.yaml
Normal file
61
examples/online_serving/chart-helm/tests/job_test.yaml
Normal file
@@ -0,0 +1,61 @@
|
||||
suite: test job
|
||||
templates:
|
||||
- job.yaml
|
||||
tests:
|
||||
- it: should create job when modelDownload is enabled
|
||||
set:
|
||||
extraInit:
|
||||
modelDownload:
|
||||
enabled: true
|
||||
image:
|
||||
repository: "amazon/aws-cli"
|
||||
tag: "2.6.4"
|
||||
pullPolicy: "IfNotPresent"
|
||||
waitContainer:
|
||||
command: [ "/bin/bash" ]
|
||||
args: [ "-c", "wait" ]
|
||||
downloadJob:
|
||||
command: [ "/bin/bash" ]
|
||||
args:
|
||||
- "-eucx"
|
||||
- "aws --endpoint-url $S3_ENDPOINT_URL s3 sync s3://$S3_BUCKET_NAME/$S3_PATH /data"
|
||||
pvcStorage: "1Gi"
|
||||
s3modelpath: "relative_s3_model_path/opt-125m"
|
||||
awsEc2MetadataDisabled: true
|
||||
asserts:
|
||||
- hasDocuments:
|
||||
count: 1
|
||||
- isKind:
|
||||
of: Job
|
||||
- equal:
|
||||
path: spec.template.spec.containers[0].name
|
||||
value: job-download-model
|
||||
- equal:
|
||||
path: spec.template.spec.containers[0].image
|
||||
value: amazon/aws-cli:2.6.4
|
||||
- equal:
|
||||
path: spec.template.spec.restartPolicy
|
||||
value: OnFailure
|
||||
|
||||
- it: should not create job when modelDownload is disabled
|
||||
set:
|
||||
extraInit:
|
||||
modelDownload:
|
||||
enabled: false
|
||||
image:
|
||||
repository: "amazon/aws-cli"
|
||||
tag: "2.6.4"
|
||||
pullPolicy: "IfNotPresent"
|
||||
waitContainer:
|
||||
command: [ "/bin/bash" ]
|
||||
args: [ "-c", "wait" ]
|
||||
downloadJob:
|
||||
command: [ "/bin/bash" ]
|
||||
args: [ "-c", "download" ]
|
||||
initContainers:
|
||||
- name: llm-d-routing-proxy
|
||||
image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0
|
||||
pvcStorage: "10Gi"
|
||||
asserts:
|
||||
- hasDocuments:
|
||||
count: 0
|
||||
32
examples/online_serving/chart-helm/tests/pvc_test.yaml
Normal file
32
examples/online_serving/chart-helm/tests/pvc_test.yaml
Normal file
@@ -0,0 +1,32 @@
|
||||
suite: test pvc
|
||||
templates:
|
||||
- pvc.yaml
|
||||
tests:
|
||||
# Test Case: PVC Created When extraInit Defined
|
||||
- it: should create pvc when extraInit is defined
|
||||
set:
|
||||
extraInit:
|
||||
modelDownload:
|
||||
enabled: true
|
||||
image:
|
||||
repository: "amazon/aws-cli"
|
||||
tag: "2.6.4"
|
||||
pullPolicy: "IfNotPresent"
|
||||
waitContainer:
|
||||
command: ["/bin/bash"]
|
||||
args: ["-c", "wait"]
|
||||
downloadJob:
|
||||
command: ["/bin/bash"]
|
||||
args: ["-c", "download"]
|
||||
pvcStorage: "10Gi"
|
||||
asserts:
|
||||
- hasDocuments:
|
||||
count: 1
|
||||
- isKind:
|
||||
of: PersistentVolumeClaim
|
||||
- equal:
|
||||
path: spec.accessModes[0]
|
||||
value: ReadWriteOnce
|
||||
- equal:
|
||||
path: spec.resources.requests.storage
|
||||
value: 10Gi
|
||||
329
examples/online_serving/chart-helm/values.schema.json
Normal file
329
examples/online_serving/chart-helm/values.schema.json
Normal file
@@ -0,0 +1,329 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/schema#",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"image": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"repository": {
|
||||
"type": "string"
|
||||
},
|
||||
"tag": {
|
||||
"type": "string"
|
||||
},
|
||||
"command": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"command",
|
||||
"repository",
|
||||
"tag"
|
||||
]
|
||||
},
|
||||
"containerPort": {
|
||||
"type": "integer"
|
||||
},
|
||||
"serviceName": {
|
||||
"type": "null"
|
||||
},
|
||||
"servicePort": {
|
||||
"type": "integer"
|
||||
},
|
||||
"extraPorts": {
|
||||
"type": "array"
|
||||
},
|
||||
"replicaCount": {
|
||||
"type": "integer"
|
||||
},
|
||||
"deploymentStrategy": {
|
||||
"type": "object"
|
||||
},
|
||||
"resources": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"requests": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cpu": {
|
||||
"type": "integer"
|
||||
},
|
||||
"memory": {
|
||||
"type": "string"
|
||||
},
|
||||
"nvidia.com/gpu": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"cpu",
|
||||
"memory",
|
||||
"nvidia.com/gpu"
|
||||
]
|
||||
},
|
||||
"limits": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cpu": {
|
||||
"type": "integer"
|
||||
},
|
||||
"memory": {
|
||||
"type": "string"
|
||||
},
|
||||
"nvidia.com/gpu": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"cpu",
|
||||
"memory",
|
||||
"nvidia.com/gpu"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"limits",
|
||||
"requests"
|
||||
]
|
||||
},
|
||||
"gpuModels": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"autoscaling": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"minReplicas": {
|
||||
"type": "integer"
|
||||
},
|
||||
"maxReplicas": {
|
||||
"type": "integer"
|
||||
},
|
||||
"targetCPUUtilizationPercentage": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"enabled",
|
||||
"maxReplicas",
|
||||
"minReplicas",
|
||||
"targetCPUUtilizationPercentage"
|
||||
]
|
||||
},
|
||||
"configs": {
|
||||
"type": "object"
|
||||
},
|
||||
"secrets": {
|
||||
"type": "object"
|
||||
},
|
||||
"externalConfigs": {
|
||||
"type": "array"
|
||||
},
|
||||
"customObjects": {
|
||||
"type": "array"
|
||||
},
|
||||
"maxUnavailablePodDisruptionBudget": {
|
||||
"type": "string"
|
||||
},
|
||||
"extraInit": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"modelDownload": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"image": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"repository": {
|
||||
"type": "string"
|
||||
},
|
||||
"tag": {
|
||||
"type": "string"
|
||||
},
|
||||
"pullPolicy": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": ["repository", "tag", "pullPolicy"]
|
||||
},
|
||||
"waitContainer": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"command": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"args": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"env": {
|
||||
"type": "array",
|
||||
"items": {"type": "object"}
|
||||
}
|
||||
},
|
||||
"required": ["command", "args"]
|
||||
},
|
||||
"downloadJob": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"command": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"args": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"env": {
|
||||
"type": "array",
|
||||
"items": {"type": "object"}
|
||||
}
|
||||
},
|
||||
"required": ["command", "args"]
|
||||
}
|
||||
},
|
||||
"required": ["enabled", "image", "waitContainer", "downloadJob"]
|
||||
},
|
||||
"initContainers": {
|
||||
"type": "array",
|
||||
"items": {"type": "object"}
|
||||
},
|
||||
"s3modelpath": {
|
||||
"type": "string"
|
||||
},
|
||||
"pvcStorage": {
|
||||
"type": "string"
|
||||
},
|
||||
"awsEc2MetadataDisabled": {
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"modelDownload",
|
||||
"initContainers",
|
||||
"pvcStorage"
|
||||
]
|
||||
},
|
||||
"extraContainers": {
|
||||
"type": "array"
|
||||
},
|
||||
"readinessProbe": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"initialDelaySeconds": {
|
||||
"type": "integer"
|
||||
},
|
||||
"periodSeconds": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failureThreshold": {
|
||||
"type": "integer"
|
||||
},
|
||||
"httpGet": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string"
|
||||
},
|
||||
"port": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"path",
|
||||
"port"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"failureThreshold",
|
||||
"httpGet",
|
||||
"initialDelaySeconds",
|
||||
"periodSeconds"
|
||||
]
|
||||
},
|
||||
"livenessProbe": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"initialDelaySeconds": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failureThreshold": {
|
||||
"type": "integer"
|
||||
},
|
||||
"periodSeconds": {
|
||||
"type": "integer"
|
||||
},
|
||||
"httpGet": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string"
|
||||
},
|
||||
"port": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"path",
|
||||
"port"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"failureThreshold",
|
||||
"httpGet",
|
||||
"initialDelaySeconds",
|
||||
"periodSeconds"
|
||||
]
|
||||
},
|
||||
"labels": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"environment": {
|
||||
"type": "string"
|
||||
},
|
||||
"release": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"environment",
|
||||
"release"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"autoscaling",
|
||||
"configs",
|
||||
"containerPort",
|
||||
"customObjects",
|
||||
"deploymentStrategy",
|
||||
"externalConfigs",
|
||||
"extraContainers",
|
||||
"extraInit",
|
||||
"extraPorts",
|
||||
"gpuModels",
|
||||
"image",
|
||||
"labels",
|
||||
"livenessProbe",
|
||||
"maxUnavailablePodDisruptionBudget",
|
||||
"readinessProbe",
|
||||
"replicaCount",
|
||||
"resources",
|
||||
"secrets",
|
||||
"servicePort"
|
||||
]
|
||||
}
|
||||
174
examples/online_serving/chart-helm/values.yaml
Normal file
174
examples/online_serving/chart-helm/values.yaml
Normal file
@@ -0,0 +1,174 @@
|
||||
# -- Default values for chart vllm
|
||||
# -- Declare variables to be passed into your templates.
|
||||
|
||||
# -- Image configuration
|
||||
image:
|
||||
# -- Image repository
|
||||
repository: "vllm/vllm-openai"
|
||||
# -- Image tag
|
||||
tag: "latest"
|
||||
# -- Container launch command
|
||||
command: ["vllm", "serve", "/data/", "--served-model-name", "opt-125m", "--enforce-eager", "--dtype", "bfloat16", "--block-size", "16", "--host", "0.0.0.0", "--port", "8000"]
|
||||
|
||||
# -- Container port
|
||||
containerPort: 8000
|
||||
# -- Service name
|
||||
serviceName:
|
||||
# -- Service port
|
||||
servicePort: 80
|
||||
# -- Additional ports configuration
|
||||
extraPorts: []
|
||||
|
||||
# -- Number of replicas
|
||||
replicaCount: 1
|
||||
|
||||
# -- Deployment strategy configuration
|
||||
deploymentStrategy: {}
|
||||
|
||||
# -- Resource configuration
|
||||
resources:
|
||||
requests:
|
||||
# -- Number of CPUs
|
||||
cpu: 4
|
||||
# -- CPU memory configuration
|
||||
memory: 16Gi
|
||||
# -- Number of gpus used
|
||||
nvidia.com/gpu: 1
|
||||
limits:
|
||||
# -- Number of CPUs
|
||||
cpu: 4
|
||||
# -- CPU memory configuration
|
||||
memory: 16Gi
|
||||
# -- Number of gpus used
|
||||
nvidia.com/gpu: 1
|
||||
|
||||
# -- Type of gpu used
|
||||
gpuModels:
|
||||
- "TYPE_GPU_USED"
|
||||
|
||||
# -- Autoscaling configuration
|
||||
autoscaling:
|
||||
# -- Enable autoscaling
|
||||
enabled: false
|
||||
# -- Minimum replicas
|
||||
minReplicas: 1
|
||||
# -- Maximum replicas
|
||||
maxReplicas: 100
|
||||
# -- Target CPU utilization for autoscaling
|
||||
targetCPUUtilizationPercentage: 80
|
||||
# targetMemoryUtilizationPercentage: 80
|
||||
|
||||
# -- Configmap
|
||||
configs: {}
|
||||
|
||||
# -- Secrets configuration
|
||||
secrets: {}
|
||||
|
||||
# -- External configuration
|
||||
externalConfigs: []
|
||||
|
||||
# -- Custom Objects configuration
|
||||
customObjects: []
|
||||
|
||||
# -- Disruption Budget Configuration
|
||||
maxUnavailablePodDisruptionBudget: ""
|
||||
|
||||
# -- Additional configuration for the init container
|
||||
extraInit:
|
||||
# -- Model download functionality (optional)
|
||||
modelDownload:
|
||||
# -- Enable model download job and wait container
|
||||
enabled: true
|
||||
# -- Image configuration for model download operations
|
||||
image:
|
||||
# -- Image repository
|
||||
repository: "amazon/aws-cli"
|
||||
# -- Image tag
|
||||
tag: "2.6.4"
|
||||
# -- Image pull policy
|
||||
pullPolicy: "IfNotPresent"
|
||||
# -- Wait container configuration (init container that waits for model to be ready)
|
||||
waitContainer:
|
||||
# -- Command to execute
|
||||
command: ["/bin/bash"]
|
||||
# -- Arguments for the wait container
|
||||
args:
|
||||
- "-eucx"
|
||||
- "while aws --endpoint-url $S3_ENDPOINT_URL s3 sync --dryrun s3://$S3_BUCKET_NAME/$S3_PATH /data | grep -q download; do sleep 10; done"
|
||||
# -- Environment variables (optional, overrides S3 defaults entirely if specified)
|
||||
# env:
|
||||
# - name: HUGGING_FACE_HUB_TOKEN
|
||||
# value: "your-token"
|
||||
# - name: MODEL_ID
|
||||
# value: "meta-llama/Llama-2-7b"
|
||||
# -- Download job configuration (job that actually downloads the model)
|
||||
downloadJob:
|
||||
# -- Command to execute
|
||||
command: ["/bin/bash"]
|
||||
# -- Arguments for the download job
|
||||
args:
|
||||
- "-eucx"
|
||||
- "aws --endpoint-url $S3_ENDPOINT_URL s3 sync s3://$S3_BUCKET_NAME/$S3_PATH /data"
|
||||
# -- Environment variables (optional, overrides S3 defaults entirely if specified)
|
||||
# env:
|
||||
# - name: HUGGING_FACE_HUB_TOKEN
|
||||
# value: "your-token"
|
||||
# - name: MODEL_ID
|
||||
# value: "meta-llama/Llama-2-7b"
|
||||
|
||||
# -- Custom init containers (appended after wait-download-model if modelDownload is enabled)
|
||||
initContainers: []
|
||||
# Example for llm-d sidecar:
|
||||
# initContainers:
|
||||
# - name: llm-d-routing-proxy
|
||||
# image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0
|
||||
# imagePullPolicy: IfNotPresent
|
||||
# ports:
|
||||
# - containerPort: 8080
|
||||
# name: proxy
|
||||
# securityContext:
|
||||
# runAsUser: 1000
|
||||
|
||||
# -- Path of the model on the s3 which hosts model weights and config files
|
||||
s3modelpath: "relative_s3_model_path/opt-125m"
|
||||
# -- Storage size for the PVC
|
||||
pvcStorage: "1Gi"
|
||||
# -- Disable AWS EC2 metadata service
|
||||
awsEc2MetadataDisabled: true
|
||||
|
||||
# -- Additional containers configuration
|
||||
extraContainers: []
|
||||
|
||||
# -- Readiness probe configuration
|
||||
readinessProbe:
|
||||
# -- Number of seconds after the container has started before readiness probe is initiated
|
||||
initialDelaySeconds: 5
|
||||
# -- How often (in seconds) to perform the readiness probe
|
||||
periodSeconds: 5
|
||||
# -- Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not ready
|
||||
failureThreshold: 3
|
||||
# -- Configuration of the Kubelet http request on the server
|
||||
httpGet:
|
||||
# -- Path to access on the HTTP server
|
||||
path: /health
|
||||
# -- Name or number of the port to access on the container, on which the server is listening
|
||||
port: 8000
|
||||
|
||||
# -- Liveness probe configuration
|
||||
livenessProbe:
|
||||
# -- Number of seconds after the container has started before liveness probe is initiated
|
||||
initialDelaySeconds: 15
|
||||
# -- Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not alive
|
||||
failureThreshold: 3
|
||||
# -- How often (in seconds) to perform the liveness probe
|
||||
periodSeconds: 10
|
||||
# -- Configuration of the Kubelet http request on the server
|
||||
httpGet:
|
||||
# -- Path to access on the HTTP server
|
||||
path: /health
|
||||
# -- Name or number of the port to access on the container, on which the server is listening
|
||||
port: 8000
|
||||
|
||||
labels:
|
||||
environment: "test"
|
||||
release: "test"
|
||||
Reference in New Issue
Block a user