feat: quantize llm on cpu by ggml (#4781)

apecloud · Aug 23, 2023 · 86e4f88 · 86e4f88
1 parent c855898
commit 86e4f88
Show file tree

Hide file tree

Showing 6 changed files with 126 additions and 0 deletions.
diff --git a/deploy/ggml/Chart.yaml b/deploy/ggml/Chart.yaml
@@ -0,0 +1,24 @@
+apiVersion: v2
+name: ggml
+description: A Helm chart for Kubernetes
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.16.0"
diff --git a/deploy/ggml/templates/NOTES.txt b/deploy/ggml/templates/NOTES.txt
diff --git a/deploy/ggml/templates/_helpers.tpl b/deploy/ggml/templates/_helpers.tpl
@@ -0,0 +1,33 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "llm.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "llm.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "llm.labels" -}}
+helm.sh/chart: {{ include "llm.chart" . }}
+{{ include "llm.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "llm.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "llm.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
diff --git a/deploy/ggml/templates/clusterdefinition.yaml b/deploy/ggml/templates/clusterdefinition.yaml
@@ -0,0 +1,36 @@
+apiVersion: apps.kubeblocks.io/v1alpha1
+kind: ClusterDefinition
+metadata:
+  name: ggml
+  labels:
+    {{- include "llm.labels" . | nindent 4}}
+spec:
+  componentDefs:
+    - name: ggml
+      workloadType: Stateful
+      characterType: ggml
+      service:
+        ports:
+          - name: model
+            port: 8000
+            targetPort: model
+      podSpec:
+        volumes:
+          - name: models
+            emptyDir: {}
+        containers:
+          - name: ggml
+            imagePullPolicy: {{default .Values.image.pullPolicy "IfNotPresent"}}
+            securityContext:
+              runAsUser: 0
+            terminationMessagePath: /dev/termination-log
+            terminationMessagePolicy: File
+            volumeMounts:
+              - name: models
+                mountPath: /models
+            ports:
+              - name: model
+                containerPort: 8000
+  connectionCredential:
+    username: root
+    password: ""
diff --git a/deploy/ggml/templates/clusterversion.yaml b/deploy/ggml/templates/clusterversion.yaml
@@ -0,0 +1,27 @@
+apiVersion: apps.kubeblocks.io/v1alpha1
+kind: ClusterVersion
+metadata:
+  name: ggml-baichuan-7b-q4
+  labels:
+    {{- include "llm.labels" . | nindent 4 }}
+spec:
+  clusterDefinitionRef: ggml
+  componentVersions:
+    - componentDefRef: ggml
+      versionsContext:
+        initContainers:
+          - name: download
+            image: apecloud/baichuan-llama-7b:ggmlv3.q4_0
+            command: ["sh", "-c", "cp /models/baichuan-llama-7b.ggmlv3.q4_0.bin /models-target/"]
+            volumeMounts:
+              - name: models
+                mountPath: /models-target
+        containers:
+          - name: ggml
+            image:  {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.repository}}:{{ default .Chart.AppVersion .Values.image.tag }}
+            env:
+              - name: MODEL
+                value: /models/baichuan-llama-7b.ggmlv3.q4_0.bin
+            volumeMounts:
+              - name: models
+                mountPath: /models
diff --git a/deploy/ggml/values.yaml b/deploy/ggml/values.yaml
@@ -0,0 +1,6 @@
+image:
+  repository: apecloud/llama-cpp-python
+  pullPolicy: IfNotPresent
+  registry: docker.io
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: latest