forked from feiskyer/ollama-kubernetes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvalues.yaml
119 lines (103 loc) · 2.52 KB
/
values.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
############ Configuration for Ollama ############
image:
repository: ollama/ollama
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: ""
replicaCount: 1 # Number of replicas, need storge class support of multiple read when pvc enabled and replica > 1
llm:
models: ["phi3"] # List of models to be pre-loaded (refer https://ollama.com/library for the supported models)
persistentVolume: # If true, use a Persistent Volume Claim, If false, use emptyDir
enabled: true
storageClass: "default"
accessModes:
- ReadWriteOnce
size: 30Gi
claimName: "" # set to non-empty value to use an existing PVC
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
podAnnotations: {}
podLabels: {}
podSecurityContext:
{}
# fsGroup: 2000
securityContext:
{}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000
service:
type: ClusterIP
port: 11434
resources:
limits:
cpu: 4
memory: 4Gi
nvidia.com/gpu: "1"
# amd.com/gpu: "1"
requests:
cpu: 100m
memory: 128Mi
nvidia.com/gpu: "1"
# amd.com/gpu: "1"
livenessProbe:
httpGet:
path: /
port: http
readinessProbe:
httpGet:
path: /
port: http
autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 100
targetCPUUtilizationPercentage: 80
# targetMemoryUtilizationPercentage: 80
nodeSelector: {}
tolerations:
- key: kubernetes.azure.com/scalesetpriority
operator: Exists
affinity: {}
############ Configuration for Ollama WebUI ############
ui:
enabled: true
replicaCount: 1
type: "lobe-chat" # Supported values are "open-webui" and "lobe-chat"
image:
repository: lobehub/lobe-chat # or use ghcr.io/open-webui/open-webui for "open-webui"
pullPolicy: IfNotPresent
tag: "latest"
service:
type: ClusterIP
port: 80
nodeSelector: {}
tolerations: {}
affinity: {}
ingress:
enabled: false
className: ""
annotations:
{}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: chart-example.local
paths:
- path: /
pathType: ImplementationSpecific
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
persistentVolume: # If true, use a Persistent Volume Claim, If false, use emptyDir
enabled: true
storageClass: "default"
accessModes:
- ReadWriteOnce
size: 10Gi
claimName: "" # set to non-empty value to use an existing PVC