6nrp_k8s_tutorial/tgi-inference.yaml at main · mahidhar/6nrp_k8s_tutorial · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
apiVersion: v1
kind: Pod
metadata:
  name: tgi-<username>
spec:
  affinity:
    nodeAffinity:
      requiredDuringSchedulingIgnoredDuringExecution:
        nodeSelectorTerms:
        - matchExpressions:
          - key: nvidia.com/gpu.product
            operator: In
            values:
            - NVIDIA-A10
  volumes:
  - name: scratch
    emptyDir: {}
  - name: shm
    emptyDir:
      medium: Memory
      sizeLimit: 4Gi
  containers:
  - name: mypod
    image: ghcr.io/huggingface/text-generation-inference:2.1.1
    resources:
      limits:
        memory: 16Gi
        cpu: 4
        nvidia.com/gpu: 1
      requests:
        memory: 16Gi
        cpu: 4
        nvidia.com/gpu: 1
    command: ["/bin/bash", "-c"]
    args:
    - >-
        cd /scratch;
        export HOME=/scratch;
        timeout 1h text-generation-launcher --model-id HuggingFaceH4/zephyr-7b-beta;
    volumeMounts:
            - name: scratch
              mountPath: /scratch
            - name: shm
              mountPath: /dev/shm