feat: added baseline instructions

devflowinc · Sep 11, 2024 · d55047e · d55047e
1 parent 131f331
commit d55047e
Show file tree

Hide file tree

Showing 7 changed files with 249 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -1 +1,55 @@
-# install-embeddings
+# install-embeddings
+
+Need
+1) eksctl
+2) aws cli
+3) helm cli
+4) kubectl
+
+
+1) Create eks cluster and install plugins
+
+Modify create_cluster.sh, the main thing that is important to edit
+is your account id. 
+
+Get your account id with `aws sts get-caller-identity`
+
+```sh
+account_id=555555555555
+region=us-east-2
+cluster_name=trieve-gpu
+main_instance_type=t3.small
+gpu_instance_type=g4dn.xlarge
+gpu_count=1
+```
+
+Ensure you have quotas for ${gpu_count}*4 under On-Demand G and VT instances in your region of choice
+
+Run `./create_cluster.sh` to generate the cluster
+
+2) Specify your embedding models
+
+Modify embedding_models.yaml for the models that you want to use
+
+3) Install the helm chart
+
+```sh
+helm upgrade -i embedding-release oci://registry-1.docker.io/trieve/embeddings-helm -f embedding_models.yaml
+```
+
+4) Get your model endpoints
+
+```sh
+kubectl get ing
+```
+
+![](./assets/ingress.png)
+
+
+## Cleanup
+
+```sh
+helm uninstall embedding-release
+./delete_cluster.sh
+```
+
diff --git a/assets/ingress.png b/assets/ingress.png
diff --git a/create_cluster.sh b/create_cluster.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+
+############
+# Parameters
+export K8S_VERSION="1.30"
+
+account_id=555555555555
+region=us-east-2
+cluster_name=trieve-gpu
+main_instance_type=t3.small
+gpu_instance_type=g4dn.xlarge
+gpu_count=1
+
+eksctl create cluster -f - << EOF
+apiVersion: eksctl.io/v1alpha5
+kind: ClusterConfig
+
+metadata:
+  name: ${cluster_name}
+  region: ${region}
+
+nodeGroups:
+  - name: main-basic
+    instanceType: ${main_instance_type}
+    desiredCapacity: 1
+  - name: main-gpu
+    labels: 
+      eks-node: gpu
+    instanceType: ${gpu_instance_type}
+    desiredCapacity: ${gpu_count}
+EOF
+
+echo 'Deployment Done!'
+
+aws eks update-kubeconfig --region ${region} --name ${cluster_name}
+
+echo 'creating config map'
+kubectl apply -f ./nvidia-device-plugin.yaml
+
+echo 'Deploying helm chart'
+
+helm repo add nvdp https://nvidia.github.io/k8s-device-plugin
+helm repo add eks https://aws.github.io/eks-charts
+helm repo update eks nvdp
+
+curl \
+  -s \
+  -o iam-policy.json \
+  https://raw.githubusercontent.com/kubernetes-sigs/aws-load-balancer-controller/v2.7.1/docs/install/iam_policy.json
+
+aws iam create-policy \
+  --policy-name="${cluster_name}-load-balancer-controller-policy" \
+  --policy-document file://iam-policy.json
+
+eksctl utils associate-iam-oidc-provider --region=${region} --cluster=${cluster_name} --approve
+eksctl create iamserviceaccount \
+  --region="${region}" \
+  --name="aws-load-balancer-controller" \
+  --namespace="kube-system" \
+  --cluster="${cluster_name}" \
+  --role-name="${cluster_name}-aws-load-balancer-controller-role" \
+  --attach-policy-arn="arn:aws:iam::${account_id}:policy/${cluster_name}-load-balancer-controller-policy" \
+  --approve
+
+helm upgrade --install \
+  aws-load-balancer-controller \
+  eks/aws-load-balancer-controller \
+  --version="1.7.1" \
+  --namespace="kube-system" \
+  --set clusterName=${cluster_name} \
+  --set serviceAccount.create=false \
+  --set serviceAccount.name=aws-load-balancer-controller
+
+helm upgrade -i nvdp nvdp/nvidia-device-plugin \
+  --namespace kube-system \
+  -f ../k8s/base/nvdp.yaml \
+  --version 0.14.0 \
+  --set config.name=nvidia-device-plugin \
+  --force
diff --git a/delete_cluster.sh b/delete_cluster.sh
@@ -0,0 +1,6 @@
+cluster_name=trieve-gpu
+region=us-east-2
+
+helm uninstall nvdp -n kube-system
+helm uninstall aws-load-balancer-controller -n kube-system
+eksctl delete cluster --region=${region} --name=${cluster_name}
diff --git a/embedding_models.yaml b/embedding_models.yaml
@@ -0,0 +1,8 @@
+accessKey: ""
+
+models:
+  jina-code:
+    replicas: 4
+    revision: main
+    modelName: jinaai/jina-embeddings-v2-base-code # The end of the URL https://huggingface.co/jinaai/jina-embeddings-v2-base-code
+    hfToken: "" # If you have a private hugging face repo
diff --git a/nvdp.yaml b/nvdp.yaml
@@ -0,0 +1,86 @@
+config:
+  name: ""
+  map: {}
+  default: ""
+  fallbackStrategies: ["named" , "single"]
+
+legacyDaemonsetAPI: null
+compatWithCPUManager: null
+migStrategy: null
+failOnInitError: null
+deviceListStrategy: null
+deviceIDStrategy: null
+nvidiaDriverRoot: null
+gdsEnabled: null
+mofedEnabled: null
+
+nameOverride: ""
+fullnameOverride: ""
+namespaceOverride: ""
+selectorLabelsOverride: {}
+
+allowDefaultNamespace: false
+
+imagePullSecrets: []
+image:
+  repository: nvcr.io/nvidia/k8s-device-plugin
+  pullPolicy: IfNotPresent
+  tag: ""
+
+updateStrategy:
+  type: RollingUpdate
+
+podAnnotations: {}
+podSecurityContext: {}
+securityContext: {}
+
+resources: {}
+nodeSelector:
+  eks-node: gpu
+affinity: {}
+tolerations:
+  - key: CriticalAddonsOnly
+    operator: Exists
+  - key: nvidia.com/gpu
+    operator: Exists
+    effect: NoSchedule
+
+priorityClassName: "system-node-critical"
+
+runtimeClassName: null
+
+nfd:
+  nameOverride: node-feature-discovery
+  enableNodeFeatureApi: false
+  master:
+    serviceAccount:
+      name: node-feature-discovery
+      create: true
+    config: 
+      extraLabelNs: ["nvidia.com"]
+
+  worker:
+    tolerations:
+    - key: "node-role.kubernetes.io/master"
+      operator: "Equal"
+      value: ""
+      effect: "NoSchedule"
+    - key: "nvidia.com/gpu"
+      operator: "Equal"
+      value: "present"
+      effect: "NoSchedule"
+    config:
+      sources:
+        pci:
+          deviceClassWhitelist:
+          - "02"
+          - "0200"
+          - "0207"
+          - "0300"
+          - "0302"
+          deviceLabelFields:
+          - vendor
+gfd:
+  enabled: false
+  nameOverride: gpu-feature-discovery
+  namespaceOverride: ""
diff --git a/nvidia-device-plugin.yaml b/nvidia-device-plugin.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: nvidia-device-plugin
+  namespace: kube-system
+data:
+  any: |-
+    version: v1
+    flags:
+      migStrategy: none
+    sharing:
+      timeSlicing:
+        resources:
+        - name: nvidia.com/gpu
+          replicas: 10