Skip to content

Commit d0c13f6

Browse files
Merge pull request #257 from richardr1126/update-helm-charts
Fix Helm charts health check, ingress, and values
2 parents fe99bb7 + 3c8e1b9 commit d0c13f6

File tree

8 files changed

+178
-132
lines changed

8 files changed

+178
-132
lines changed

charts/kokoro-fastapi/Chart.yaml

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,12 @@
11
apiVersion: v2
22
name: kokoro-fastapi
3-
description: A Helm chart for kokoro-fastapi
4-
5-
# A chart can be either an 'application' or a 'library' chart.
6-
#
7-
# Application charts are a collection of templates that can be packaged into versioned archives
8-
# to be deployed.
9-
#
10-
# Library charts provide useful utilities or functions for the chart developer. They're included as
11-
# a dependency of application charts to inject those utilities and functions into the rendering
12-
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
3+
description: A Helm chart for deploying the Kokoro FastAPI TTS service to Kubernetes
134
type: application
5+
version: 0.2.0
6+
appVersion: "0.2.0"
147

15-
# This is the chart version. This version number should be incremented each time you make changes
16-
# to the chart and its templates, including the app version.
17-
# Versions are expected to follow Semantic Versioning (https://semver.org/)
18-
version: 0.1.0
19-
20-
# This is the version number of the application being deployed. This version number should be
21-
# incremented each time you make changes to the application. Versions are not expected to
22-
# follow Semantic Versioning. They should reflect the version the application is using.
23-
# It is recommended to use it with quotes.
24-
appVersion: "1.16.0"
8+
keywords:
9+
- tts
10+
- fastapi
11+
- gpu
12+
- kokoro
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Tested on
2+
# - Azure AKS with GPU node pool with Nvidia GPU operator
3+
# - This setup uses 1 ingress and load balances between 2 replicas, enabling simultaneous requests
4+
#
5+
# Azure CLI command to create a GPU node pool:
6+
# az aks nodepool add \
7+
# --resource-group $AZ_RESOURCE_GROUP \
8+
# --cluster-name $CLUSTER_NAME \
9+
# --name t4gpus \
10+
# --node-vm-size Standard_NC4as_T4_v3 \
11+
# --node-count 2 \
12+
# --enable-cluster-autoscaler \
13+
# --min-count 1 \
14+
# --max-count 2 \
15+
# --priority Spot \
16+
# --eviction-policy Delete \
17+
# --spot-max-price -1 \
18+
# --node-taints "sku=gpu:NoSchedule,kubernetes.azure.com/scalesetpriority=spot:NoSchedule" \
19+
# --skip-gpu-driver-install
20+
21+
kokoroTTS:
22+
replicaCount: 8
23+
port: 8880
24+
tag: v0.2.0
25+
pullPolicy: IfNotPresent
26+
27+
# Azure specific settings for spot t4 GPU nodes with Nvidia GPU operator
28+
tolerations:
29+
- key: "kubernetes.azure.com/scalesetpriority"
30+
operator: Equal
31+
value: "spot"
32+
effect: NoSchedule
33+
- key: "sku"
34+
operator: Equal
35+
value: "gpu"
36+
effect: NoSchedule
37+
38+
ingress:
39+
enabled: true
40+
className: "nginx"
41+
annotations:
42+
# Requires cert-manager and external-dns to be in the cluster for TLS and DNS
43+
cert-manager.io/cluster-issuer: letsencrypt-prod
44+
external-dns.alpha.kubernetes.io/hostname: your-external-dns-enabled-hostname
45+
external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
46+
hosts:
47+
- host: your-external-dns-enabled-hostname
48+
paths:
49+
- path: /
50+
pathType: Prefix
51+
tls:
52+
- secretName: kokoro-fastapi-tls
53+
hosts:
54+
- your-external-dns-enabled-hostname
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Follow the official NVIDIA GPU Operator documentation
2+
# to install the GPU operator with these settings:
3+
# https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/getting-started.html
4+
#
5+
# This example is for a Nvidia T4 16gb GPU node pool with only 1 GPU on each node on Azure AKS.
6+
# It uses time-slicing to share the a and claim to the system that 1 GPU is 4 GPUs.
7+
# So each pod has access to a smaller gpu with 4gb of memory.
8+
#
9+
devicePlugin: # Remove this if you dont want to use time-slicing
10+
config:
11+
create: true
12+
name: "time-slicing-config"
13+
default: "any"
14+
data:
15+
any: |-
16+
version: v1
17+
flags:
18+
migStrategy: none
19+
sharing:
20+
timeSlicing:
21+
resources:
22+
- name: nvidia.com/gpu
23+
replicas: 4
24+
25+
daemonsets:
26+
tolerations:
27+
- key: "sku"
28+
operator: Equal
29+
value: "gpu"
30+
effect: NoSchedule
31+
- key: "kubernetes.azure.com/scalesetpriority"
32+
operator: Equal
33+
value: "spot"
34+
effect: NoSchedule
35+
36+
node-feature-discovery:
37+
master:
38+
tolerations:
39+
- key: "sku"
40+
operator: Equal
41+
value: "gpu"
42+
effect: NoSchedule
43+
- key: "kubernetes.azure.com/scalesetpriority"
44+
operator: Equal
45+
value: "spot"
46+
effect: NoSchedule
47+
worker:
48+
tolerations:
49+
- key: "sku"
50+
operator: Equal
51+
value: "gpu"
52+
effect: NoSchedule
53+
- key: "kubernetes.azure.com/scalesetpriority"
54+
operator: Equal
55+
value: "spot"
56+
effect: NoSchedule

charts/kokoro-fastapi/templates/NOTES.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
1414
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "kokoro-fastapi.fullname" . }}'
1515
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "kokoro-fastapi.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
16-
echo http://$SERVICE_IP:{{ .Values.service.port }}
16+
echo http://$SERVICE_IP:{{ .Values.kokoroTTS.port }}
1717
{{- else if contains "ClusterIP" .Values.service.type }}
1818
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "kokoro-fastapi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
1919
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
20-
echo "Visit http://127.0.0.1:8080 to use your application"
21-
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
20+
echo "Visit http://127.0.0.1:8880 to use your application"
21+
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8880:$CONTAINER_PORT
2222
{{- end }}
Lines changed: 25 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,82 +1,43 @@
11
{{- if .Values.ingress.enabled -}}
2-
{{- $fullName := include "kokoro-fastapi.fullname" . -}}
3-
{{- $svcPort := .Values.service.port -}}
4-
{{- $rewriteTargets := (list) -}}
5-
{{- with .Values.ingress.host }}
6-
{{- range .endpoints }}
7-
{{- $serviceName := default $fullName .serviceName -}}
8-
{{- $rewrite := .rewrite | default "none" -}}
9-
{{- if not (has $rewrite $rewriteTargets ) -}}
10-
{{- $rewriteTargets = append $rewriteTargets $rewrite -}}
11-
{{- end -}}
12-
{{- end}}
13-
{{- end }}
14-
{{- range $key := $rewriteTargets }}
15-
{{- $expandedRewrite := regexReplaceAll "/(.*)$" $key "slash${1}" -}}
162
apiVersion: networking.k8s.io/v1
173
kind: Ingress
184
metadata:
19-
{{- if eq $key "none" }}
20-
name: {{ $fullName }}
21-
{{- else }}
22-
name: {{ $fullName }}-{{ $expandedRewrite }}
23-
{{- end }}
5+
name: {{ include "kokoro-fastapi.fullname" . }}
246
labels:
25-
{{- include "kokoro-fastapi.labels" $ | nindent 4 }}
26-
{{- if ne $key "none" }}
7+
{{- include "kokoro-fastapi.labels" . | nindent 4 }}
8+
{{- with .Values.ingress.annotations }}
279
annotations:
28-
nginx.ingress.kubernetes.io/rewrite-target: {{ regexReplaceAll "/$" $key "" }}/$2
29-
{{- end }}
10+
{{- toYaml . | nindent 4 }}
11+
{{- end }}
3012
spec:
31-
{{- if $.Values.ingress.tls }}
13+
{{- with .Values.ingress.className }}
14+
ingressClassName: {{ . }}
15+
{{- end }}
16+
{{- if .Values.ingress.tls }}
3217
tls:
33-
{{- range $.Values.ingress.tls }}
18+
{{- range .Values.ingress.tls }}
3419
- hosts:
35-
{{- range .hosts }}
20+
{{- range .hosts }}
3621
- {{ . | quote }}
37-
{{- end }}
22+
{{- end }}
3823
secretName: {{ .secretName }}
24+
{{- end }}
3925
{{- end }}
40-
{{- end }}
4126
rules:
42-
{{- with $.Values.ingress.host }}
43-
- host: {{ .name | quote }}
27+
{{- range .Values.ingress.hosts }}
28+
- host: {{ .host | quote }}
4429
http:
4530
paths:
46-
{{- range .endpoints }}
47-
{{- $serviceName := default $fullName .serviceName -}}
48-
{{- $servicePort := default (print "http") .servicePort -}}
49-
{{- if eq ( .rewrite | default "none" ) $key }}
50-
{{- range .paths }}
51-
{{- if not (contains "@" .) }}
52-
{{- if eq $key "none" }}
53-
- path: {{ . }}
54-
{{- else }}
55-
- path: {{ regexReplaceAll "(.*)/$" . "${1}" }}(/|$)(.*)
56-
{{- end }}
57-
pathType: Prefix
58-
backend:
59-
service:
60-
name: "{{ $fullName }}-{{ $serviceName }}"
61-
port:
62-
number: {{ $servicePort }}
63-
{{- else }}
64-
{{- $path := . -}}
65-
{{- $replicaCount := include "getServiceNameReplicaCount" (dict "global" $.Values "serviceName" $serviceName ) -}}
66-
{{- range $count, $e := until ($replicaCount|int) }}
67-
- path: {{ $path | replace "@" ( . | toString ) }}(/|$)(.*)
68-
pathType: Prefix
69-
backend:
70-
service:
71-
name: "{{ $fullName }}-{{ $serviceName }}-{{ . }}"
72-
port:
73-
number: {{ $servicePort }}
74-
{{- end }}
75-
{{- end }}
31+
{{- range .paths }}
32+
- path: {{ .path }}
33+
{{- with .pathType }}
34+
pathType: {{ . }}
7635
{{- end }}
36+
backend:
37+
service:
38+
name: {{ include "kokoro-fastapi.fullname" $ }}-kokoro-tts-service
39+
port:
40+
number: {{ $.Values.kokoroTTS.port }}
7741
{{- end }}
78-
{{- end }}
79-
{{- end }}
80-
---
81-
{{- end }}
42+
{{- end }}
8243
{{- end }}

charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ spec:
2020
labels:
2121
{{- include "kokoro-fastapi.selectorLabels" . | nindent 8 }}
2222
spec:
23-
{{- with .Values.images.imagePullSecrets }}
23+
{{- with .Values.kokoroTTS.imagePullSecrets }}
2424
imagePullSecrets:
2525
{{- toYaml . | nindent 8 }}
2626
{{- end }}
@@ -49,10 +49,16 @@ spec:
4949
httpGet:
5050
path: /health
5151
port: kokoro-tts-http
52+
initialDelaySeconds: 30
53+
periodSeconds: 30
54+
timeoutSeconds: 5
5255
readinessProbe:
5356
httpGet:
5457
path: /health
5558
port: kokoro-tts-http
59+
initialDelaySeconds: 30
60+
periodSeconds: 30
61+
timeoutSeconds: 5
5662
resources:
5763
{{- toYaml .Values.kokoroTTS.resources | nindent 12 }}
5864
volumeMounts: []

charts/kokoro-fastapi/templates/tests/test-connection.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,5 @@ spec:
1111
- name: wget
1212
image: busybox
1313
command: ['wget']
14-
args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.service.port }}']
14+
args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.kokoroTTS.port }}']
1515
restartPolicy: Never

charts/kokoro-fastapi/values.yaml

Lines changed: 24 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,19 @@
11
# Default values for kokoro-fastapi.
22
# This is a YAML-formatted file.
33
# Declare variables to be passed into your templates.
4-
5-
replicaCount: 1
6-
7-
images:
8-
pullPolicy: "Always"
9-
imagePullSecrets: [ ]
4+
kokoroTTS:
5+
replicaCount: 1
6+
# The name of the deployment repository
7+
repository: "ghcr.io/remsky/kokoro-fastapi-gpu"
8+
imagePullSecrets: [] # Set if using a private image or getting rate limited
9+
tag: "latest"
10+
pullPolicy: Always
11+
port: 8880
12+
resources:
13+
limits:
14+
nvidia.com/gpu: 1
15+
requests:
16+
nvidia.com/gpu: 1
1017

1118
nameOverride: ""
1219
fullnameOverride: ""
@@ -38,47 +45,21 @@ service:
3845

3946
ingress:
4047
enabled: false
41-
className: ""
48+
className: "nginx"
4249
annotations: {}
43-
# kubernetes.io/ingress.class: nginx
44-
# kubernetes.io/tls-acme: "true"
45-
host:
46-
name: kokoro.example.com
47-
endpoints:
48-
- paths:
49-
- "/"
50-
serviceName: "fastapi"
51-
servicePort: 8880
50+
# cert-manager.io/cluster-issuer: letsencrypt-prod
51+
# external-dns.alpha.kubernetes.io/hostname: kokoro.example.com
52+
# external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
53+
hosts:
54+
- host: kokoro.example.com
55+
paths:
56+
- path: /
57+
pathType: Prefix
5258

5359
tls: []
54-
# - secretName: chart-example-tls
60+
# - secretName: kokoro-fastapi-tls
5561
# hosts:
56-
# - chart-example.local
57-
58-
kokoroTTS:
59-
repository: "ghcr.io/remsky/kokoro-fastapi-gpu"
60-
tag: "latest"
61-
pullPolicy: Always
62-
serviceName: "fastapi"
63-
port: 8880
64-
replicaCount: 1
65-
resources:
66-
limits:
67-
nvidia.com/gpu: 1
68-
requests:
69-
nvidia.com/gpu: 1
70-
71-
72-
# We usually recommend not to specify default resources and to leave this as a conscious
73-
# choice for the user. This also increases chances charts run on environments with little
74-
# resources, such as Minikube. If you do want to specify resources, uncomment the following
75-
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
76-
# limits:
77-
# cpu: 100m
78-
# memory: 128Mi
79-
# requests:
80-
# cpu: 100m
81-
# memory: 128Mi
62+
# - kokoro.example.com
8263

8364
autoscaling:
8465
enabled: false

0 commit comments

Comments
 (0)