diff --git a/kubernetes_yaml/chatbot/README.md b/kubernetes_yaml/chatbot/README.md new file mode 100644 index 00000000..b31c8201 --- /dev/null +++ b/kubernetes_yaml/chatbot/README.md @@ -0,0 +1,38 @@ +## Simple chatbot + +This folder holds the resource definitions to launch a chatbot. +Environment variables `MODEL_ENDPOINT` must be provided. +Optionally, `MODEL_ENDPOINT_BEARER`, and `MODEL_NAME` can be provided. + +Update the deployment as necessary and +run this from the root of the repository + + +```bash +oc apply --kustomize ./chatbot +``` + +TODO: +- get `MODEL_ENDPOINT` from configmap or secret. Currently you need to update it in deployment.yaml + +### Chatbot + +The chatbot image is built from +[ai-lab-recipes repository chatbot](https://github.com/containers/ai-lab-recipes/blob/main/recipes/natural_language_processing/chatbot/app/Containerfile) +with the below system prompt line from +[chatbot_ui.py](https://github.com/containers/ai-lab-recipes/blob/main/recipes/natural_language_processing/chatbot/app/chatbot_ui.py) +commented out, since it's not compatible with vLLM: + +```bash +prompt = ChatPromptTemplate.from_messages([ + #("system", "You are world class technical advisor."), + MessagesPlaceholder(variable_name="history"), + ("user", "{input}") +]) +``` + + +## Candidate model inference service + +This folder also contains an example InferenceService definition. Modify [candidate-server.yaml](./candidate-server.yaml) as needed to launch a model +from `S3` with `vLLM`. diff --git a/kubernetes_yaml/chatbot/candidate-server.yaml b/kubernetes_yaml/chatbot/candidate-server.yaml new file mode 100644 index 00000000..c1e3b5ce --- /dev/null +++ b/kubernetes_yaml/chatbot/candidate-server.yaml @@ -0,0 +1,37 @@ +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + annotations: + openshift.io/display-name: candidate + serving.knative.openshift.io/enablePassthrough: "true" + sidecar.istio.io/inject: "true" + sidecar.istio.io/rewriteAppHTTPProbers: "true" + finalizers: + - inferenceservice.finalizers + generation: 2 + labels: + opendatahub.io/dashboard: "true" + name: candidatemodel + namespace: ilab +spec: + predictor: + maxReplicas: 1 + minReplicas: 1 + model: + modelFormat: + name: vLLM + name: "" + resources: + limits: + cpu: "2" + memory: 8Gi + nvidia.com/gpu: "1" + requests: + cpu: "1" + memory: 4Gi + nvidia.com/gpu: "1" + runtime: candidatemodel + storage: + key: aws-connection-ilab-s3 + # Update path to match canddidate-server location + path: xxxxxxxx/xxxxxxxxx/pvc-to-model-op/model/phase_2/model/hf_format/candidate_model/ diff --git a/kubernetes_yaml/chatbot/chatbot-deployment.yaml b/kubernetes_yaml/chatbot/chatbot-deployment.yaml new file mode 100644 index 00000000..673927b4 --- /dev/null +++ b/kubernetes_yaml/chatbot/chatbot-deployment.yaml @@ -0,0 +1,36 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatbot + labels: + app: chatbot +spec: + replicas: 1 + selector: + matchLabels: + app: chatbot + template: + metadata: + labels: + app: chatbot + spec: + serviceAccountName: chatbot-sa + containers: + - name: chatbot-inference + image: quay.io/sallyom/chatbot:vllm + env: + - name: MODEL_NAME + # Update this value to the endpoint of a running model server + value: mixtral + - name: MODEL_ENDPOINT + # Update this value to the endpoint of a running model server + value: https://mixtral-sallyom.apps.ocp-beta-test.nerc.mghpcc.org + - name: MODEL_ENDPOINT_BEARER + valueFrom: + secretKeyRef: + name: judge-server + key: api_key + ports: + - containerPort: 8501 + securityContext: + runAsNonRoot: true diff --git a/kubernetes_yaml/chatbot/kustomization.yaml b/kubernetes_yaml/chatbot/kustomization.yaml new file mode 100644 index 00000000..0767d233 --- /dev/null +++ b/kubernetes_yaml/chatbot/kustomization.yaml @@ -0,0 +1,8 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - chatbot-deployment.yaml + - service.yaml + - route.yaml + - sa.yaml diff --git a/kubernetes_yaml/chatbot/route.yaml b/kubernetes_yaml/chatbot/route.yaml new file mode 100644 index 00000000..678b55db --- /dev/null +++ b/kubernetes_yaml/chatbot/route.yaml @@ -0,0 +1,14 @@ +apiVersion: route.openshift.io/v1 +kind: Route +metadata: + name: chatbot + labels: + app: chatbot +spec: + to: + kind: Service + name: chatbot-service + port: + targetPort: 8501 + tls: + termination: edge diff --git a/kubernetes_yaml/chatbot/sa.yaml b/kubernetes_yaml/chatbot/sa.yaml new file mode 100644 index 00000000..23b19f60 --- /dev/null +++ b/kubernetes_yaml/chatbot/sa.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: chatbot-sa diff --git a/kubernetes_yaml/chatbot/service.yaml b/kubernetes_yaml/chatbot/service.yaml new file mode 100644 index 00000000..ba5d78c5 --- /dev/null +++ b/kubernetes_yaml/chatbot/service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: chatbot-service + labels: + app: chatbot +spec: + selector: + app: chatbot + ports: + - protocol: TCP + port: 8501 + targetPort: 8501