Skip to content

Commit

Permalink
feat: added baseline instructions
Browse files Browse the repository at this point in the history
  • Loading branch information
cdxker committed Sep 11, 2024
1 parent 131f331 commit d55047e
Show file tree
Hide file tree
Showing 7 changed files with 249 additions and 1 deletion.
56 changes: 55 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,55 @@
# install-embeddings
# install-embeddings

Need
1) eksctl
2) aws cli
3) helm cli
4) kubectl


1) Create eks cluster and install plugins

Modify create_cluster.sh, the main thing that is important to edit
is your account id.

Get your account id with `aws sts get-caller-identity`

```sh
account_id=555555555555
region=us-east-2
cluster_name=trieve-gpu
main_instance_type=t3.small
gpu_instance_type=g4dn.xlarge
gpu_count=1
```

Ensure you have quotas for ${gpu_count}*4 under On-Demand G and VT instances in your region of choice

Run `./create_cluster.sh` to generate the cluster

2) Specify your embedding models

Modify embedding_models.yaml for the models that you want to use

3) Install the helm chart

```sh
helm upgrade -i embedding-release oci://registry-1.docker.io/trieve/embeddings-helm -f embedding_models.yaml
```

4) Get your model endpoints

```sh
kubectl get ing
```

![](./assets/ingress.png)


## Cleanup

```sh
helm uninstall embedding-release
./delete_cluster.sh
```

Binary file added assets/ingress.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
79 changes: 79 additions & 0 deletions create_cluster.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/bin/bash

############
# Parameters
export K8S_VERSION="1.30"

account_id=555555555555
region=us-east-2
cluster_name=trieve-gpu
main_instance_type=t3.small
gpu_instance_type=g4dn.xlarge
gpu_count=1

eksctl create cluster -f - << EOF
apiVersion: eksctl.io/v1alpha5
kind: ClusterConfig
metadata:
name: ${cluster_name}
region: ${region}
nodeGroups:
- name: main-basic
instanceType: ${main_instance_type}
desiredCapacity: 1
- name: main-gpu
labels:
eks-node: gpu
instanceType: ${gpu_instance_type}
desiredCapacity: ${gpu_count}
EOF

echo 'Deployment Done!'

aws eks update-kubeconfig --region ${region} --name ${cluster_name}

echo 'creating config map'
kubectl apply -f ./nvidia-device-plugin.yaml

echo 'Deploying helm chart'

helm repo add nvdp https://nvidia.github.io/k8s-device-plugin
helm repo add eks https://aws.github.io/eks-charts
helm repo update eks nvdp

curl \
-s \
-o iam-policy.json \
https://raw.githubusercontent.com/kubernetes-sigs/aws-load-balancer-controller/v2.7.1/docs/install/iam_policy.json

aws iam create-policy \
--policy-name="${cluster_name}-load-balancer-controller-policy" \
--policy-document file://iam-policy.json

eksctl utils associate-iam-oidc-provider --region=${region} --cluster=${cluster_name} --approve
eksctl create iamserviceaccount \
--region="${region}" \
--name="aws-load-balancer-controller" \
--namespace="kube-system" \
--cluster="${cluster_name}" \
--role-name="${cluster_name}-aws-load-balancer-controller-role" \
--attach-policy-arn="arn:aws:iam::${account_id}:policy/${cluster_name}-load-balancer-controller-policy" \
--approve

helm upgrade --install \
aws-load-balancer-controller \
eks/aws-load-balancer-controller \
--version="1.7.1" \
--namespace="kube-system" \
--set clusterName=${cluster_name} \
--set serviceAccount.create=false \
--set serviceAccount.name=aws-load-balancer-controller

helm upgrade -i nvdp nvdp/nvidia-device-plugin \
--namespace kube-system \
-f ../k8s/base/nvdp.yaml \
--version 0.14.0 \
--set config.name=nvidia-device-plugin \
--force
6 changes: 6 additions & 0 deletions delete_cluster.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
cluster_name=trieve-gpu
region=us-east-2

helm uninstall nvdp -n kube-system
helm uninstall aws-load-balancer-controller -n kube-system
eksctl delete cluster --region=${region} --name=${cluster_name}
8 changes: 8 additions & 0 deletions embedding_models.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
accessKey: ""

models:
jina-code:
replicas: 4
revision: main
modelName: jinaai/jina-embeddings-v2-base-code # The end of the URL https://huggingface.co/jinaai/jina-embeddings-v2-base-code
hfToken: "" # If you have a private hugging face repo
86 changes: 86 additions & 0 deletions nvdp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
config:
name: ""
map: {}
default: ""
fallbackStrategies: ["named" , "single"]

legacyDaemonsetAPI: null
compatWithCPUManager: null
migStrategy: null
failOnInitError: null
deviceListStrategy: null
deviceIDStrategy: null
nvidiaDriverRoot: null
gdsEnabled: null
mofedEnabled: null

nameOverride: ""
fullnameOverride: ""
namespaceOverride: ""
selectorLabelsOverride: {}

allowDefaultNamespace: false

imagePullSecrets: []
image:
repository: nvcr.io/nvidia/k8s-device-plugin
pullPolicy: IfNotPresent
tag: ""

updateStrategy:
type: RollingUpdate

podAnnotations: {}
podSecurityContext: {}
securityContext: {}

resources: {}
nodeSelector:
eks-node: gpu
affinity: {}
tolerations:
- key: CriticalAddonsOnly
operator: Exists
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule

priorityClassName: "system-node-critical"

runtimeClassName: null

nfd:
nameOverride: node-feature-discovery
enableNodeFeatureApi: false
master:
serviceAccount:
name: node-feature-discovery
create: true
config:
extraLabelNs: ["nvidia.com"]

worker:
tolerations:
- key: "node-role.kubernetes.io/master"
operator: "Equal"
value: ""
effect: "NoSchedule"
- key: "nvidia.com/gpu"
operator: "Equal"
value: "present"
effect: "NoSchedule"
config:
sources:
pci:
deviceClassWhitelist:
- "02"
- "0200"
- "0207"
- "0300"
- "0302"
deviceLabelFields:
- vendor
gfd:
enabled: false
nameOverride: gpu-feature-discovery
namespaceOverride: ""
15 changes: 15 additions & 0 deletions nvidia-device-plugin.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: nvidia-device-plugin
namespace: kube-system
data:
any: |-
version: v1
flags:
migStrategy: none
sharing:
timeSlicing:
resources:
- name: nvidia.com/gpu
replicas: 10

0 comments on commit d55047e

Please sign in to comment.