opendatahub-io · sallyom · Nov 5, 2024 · tumido · Nov 7, 2024
diff --git a/kubernetes_yaml/chatbot/README.md b/kubernetes_yaml/chatbot/README.md
@@ -0,0 +1,36 @@
+## Simple chatbot
+
+This folder holds the resource definitions to launch a chatbot.
+Before deploying, update the values in [configmap.yaml](./configmap.yaml) and [secret-token.yaml](./secret-token.yaml)
+Specifically, `model_endpoint` value must be provided.
+Optionally, `model_name` and `api_key` can be provided.
+
+Update the deployment as necessary and
+run this from the root of the repository
+
+
+```bash
+oc apply --kustomize ./kubernetes_yaml/chatbot
+```
+
+### Chatbot
+
+The chatbot image is built from
+[ai-lab-recipes repository chatbot](https://github.com/containers/ai-lab-recipes/blob/main/recipes/natural_language_processing/chatbot/app/Containerfile)
+with the below system prompt line from
+[chatbot_ui.py](https://github.com/containers/ai-lab-recipes/blob/main/recipes/natural_language_processing/chatbot/app/chatbot_ui.py)
+commented out, since it's not compatible with vLLM:
+
+```bash
+prompt = ChatPromptTemplate.from_messages([
+    #("system", "You are world class technical advisor."),
+    MessagesPlaceholder(variable_name="history"),
+    ("user", "{input}")
+])
+```
+
+
+## Candidate model inference service
+
+This folder also contains an example InferenceService definition. Modify [candidate-server.yaml](./candidate-server.yaml) as needed to launch a model
+from `S3` with `vLLM`.
diff --git a/kubernetes_yaml/chatbot/candidate-server.yaml b/kubernetes_yaml/chatbot/candidate-server.yaml
@@ -0,0 +1,36 @@
+apiVersion: serving.kserve.io/v1beta1
+kind: InferenceService
+metadata:
+  annotations:
+    openshift.io/display-name: candidate
+    serving.knative.openshift.io/enablePassthrough: "true"
+    sidecar.istio.io/inject: "true"
+    sidecar.istio.io/rewriteAppHTTPProbers: "true"
+  finalizers:
+  - inferenceservice.finalizers
+  labels:
+    opendatahub.io/dashboard: "true"
+  name: candidatemodel
+spec:
+  predictor:
+    maxReplicas: 1
+    minReplicas: 1
+    model:
+      modelFormat:
+        name: vLLM
+      name: ""
+      resources:
+        limits:
+          cpu: "2"
+          memory: 8Gi
+          nvidia.com/gpu: "1"
+        requests:
+          cpu: "1"
+          memory: 4Gi
+          nvidia.com/gpu: "1"
+      runtime: candidatemodel
+      storage:
+        # Update to match project s3 storage
+        key: storage-s3
+        # Update path to match candidate-server location
+        path: xxxxxxxx/xxxxxxxxx/pvc-to-model-op/model/phase_2/model/hf_format/candidate_model/
diff --git a/kubernetes_yaml/chatbot/configmap.yaml b/kubernetes_yaml/chatbot/configmap.yaml
@@ -0,0 +1,7 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: chatbot-config
+data:
+  model_name: UPDATE
+  model_endpoint: UPDATE
diff --git a/kubernetes_yaml/chatbot/deployment.yaml b/kubernetes_yaml/chatbot/deployment.yaml
@@ -0,0 +1,40 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatbot
+  labels:
+    app: chatbot
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatbot
+  template:
+    metadata:
+      labels:
+        app: chatbot
+    spec:
+      serviceAccountName: chatbot-sa
+      containers:
+      - name: chatbot-inference
+        image: quay.io/sallyom/chatbot:vllm
+        env:
+          - name: MODEL_NAME
+            valueFrom:
+              configMapKeyRef:
+                name: chatbot-config
+                key: model_name
+          - name: MODEL_ENDPOINT
+            valueFrom:
+              configMapKeyRef:
+                name: chatbot-config
+                key: model_endpoint
+          - name: MODEL_ENDPOINT_BEARER
+            valueFrom:
+              secretKeyRef:
+                name: model-token
+                key: api_key
+        ports:
+          - containerPort: 8501
+        securityContext:
+          runAsNonRoot: true
diff --git a/kubernetes_yaml/chatbot/kustomization.yaml b/kubernetes_yaml/chatbot/kustomization.yaml
@@ -0,0 +1,10 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - deployment.yaml
+  - service.yaml
+  - route.yaml
+  - sa.yaml
+  - configmap.yaml
+  - secret-token.yaml
diff --git a/kubernetes_yaml/chatbot/route.yaml b/kubernetes_yaml/chatbot/route.yaml
@@ -0,0 +1,14 @@
+apiVersion: route.openshift.io/v1
+kind: Route
+metadata:
+  name: chatbot
+  labels:
+    app: chatbot
+spec:
+  to:
+    kind: Service
+    name: chatbot-service
+  port:
+    targetPort: 8501
+  tls:
+    termination: edge
diff --git a/kubernetes_yaml/chatbot/sa.yaml b/kubernetes_yaml/chatbot/sa.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: chatbot-sa
diff --git a/kubernetes_yaml/chatbot/secret-token.yaml b/kubernetes_yaml/chatbot/secret-token.yaml
@@ -0,0 +1,7 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: model-token
+type: Opaque
+stringData:
+  api_key: "xxx"
diff --git a/kubernetes_yaml/chatbot/service.yaml b/kubernetes_yaml/chatbot/service.yaml
@@ -0,0 +1,13 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: chatbot-service
+  labels:
+    app: chatbot
+spec:
+  selector:
+    app: chatbot
+  ports:
+    - protocol: TCP
+      port: 8501
+      targetPort: 8501