diff --git a/README.md b/README.md index 4a82ef118..a8278ae66 100644 --- a/README.md +++ b/README.md @@ -51,14 +51,18 @@ A task with the following resources: ``` resources: limits: - nvidia.com/gpu: 1 # requesting 1 vGPU - nvidia.com/gpumem: 3000 # Each vGPU contains 3000m device memory + nvidia.com/gpu: 1 # declare how many physical GPUs the pod needs + nvidia.com/gpumem: 3000 # identifies 3G GPU memory each physical GPU allocates to the pod ``` will see 3G device memory inside container ![img](./imgs/hard_limit.jpg) +> Note: +1. **After installing HAMi, the value of `nvidia.com/gpu` registered on the node defaults to the number of vGPUs.** +2. **When requesting resources in a pod, `nvidia.com/gpu` refers to the number of physical GPUs required by the current pod.** + ### Supported devices [![nvidia GPU](https://img.shields.io/badge/Nvidia-GPU-blue)](https://github.com/Project-HAMi/HAMi#preparing-your-gpu-nodes) diff --git a/README_cn.md b/README_cn.md index e4bd99fed..77b445fb0 100644 --- a/README_cn.md +++ b/README_cn.md @@ -55,6 +55,10 @@ HAMi支持设备资源的硬隔离 ![img](./imgs/hard_limit.jpg) +> 注意: +1. **安装HAMi后,节点上注册的 `nvidia.com/gpu` 值默认为vGPU数量** +2. **pod中申请资源时,`nvidia.com/gpu` 指当前pod需要的物理GPU数量** + ### 支持的设备 [![nvidia GPU](https://img.shields.io/badge/Nvidia-GPU-blue)](https://github.com/Project-HAMi/HAMi#preparing-your-gpu-nodes) diff --git a/docs/develop/tasklist.md b/docs/develop/tasklist.md index 873366f98..84ab91a4d 100644 --- a/docs/develop/tasklist.md +++ b/docs/develop/tasklist.md @@ -113,6 +113,6 @@ spec: command:["bash","-c","sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 VGPUs + nvidia.com/gpu: 2 # declare how many physical GPUs the pod needs ``` diff --git a/example.yaml b/example.yaml index a6afefd31..a1ad2ec83 100644 --- a/example.yaml +++ b/example.yaml @@ -34,8 +34,8 @@ spec: - while true; do /cuda-samples/vectorAdd; done resources: limits: - nvidia.com/gpu: 1 # requesting 1 vGPUs - nvidia.com/gpumem: 3000 # Each vGPU contains 3000m device memory (Optional,Integer) + nvidia.com/gpu: 1 # declare how many physical GPUs the pod needs + nvidia.com/gpumem: 3000 # Each vGPU contains 3000M device memory (Optional,Integer) terminationMessagePath: /dev/termination-log terminationMessagePolicy: File imagePullPolicy: IfNotPresent diff --git a/examples/nvidia/default_use.yaml b/examples/nvidia/default_use.yaml index ab484967b..82ea73cee 100644 --- a/examples/nvidia/default_use.yaml +++ b/examples/nvidia/default_use.yaml @@ -9,6 +9,6 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs - nvidia.com/gpumem: 3000 # Each vGPU contains 3000m device memory (Optional,Integer) - nvidia.com/gpucores: 30 # Each vGPU uses 30% of the entire GPU (Optional,Integer) + nvidia.com/gpu: 2 # declare how many physical GPUs the pod needs + nvidia.com/gpumem: 3000 # identifies 3000M GPU memory each physical GPU allocates to the pod (Optional,Integer) + nvidia.com/gpucores: 30 # identifies 30% GPU GPU core each physical GPU allocates to the pod (Optional,Integer) diff --git a/examples/nvidia/default_use_legacy.yaml b/examples/nvidia/default_use_legacy.yaml index 0796a0101..8336e23e0 100644 --- a/examples/nvidia/default_use_legacy.yaml +++ b/examples/nvidia/default_use_legacy.yaml @@ -9,4 +9,4 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs + nvidia.com/gpu: 2 # declare how many physical GPUs the pod needs diff --git a/examples/nvidia/example.yaml b/examples/nvidia/example.yaml index 0710269b4..c78df5078 100644 --- a/examples/nvidia/example.yaml +++ b/examples/nvidia/example.yaml @@ -9,11 +9,11 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs - #nvidia.com/gpumem: 3000 # Each vGPU containers 3000M device memory - nvidia.com/gpumem-percentage: 50 #Each vGPU containers 50% device memory of that GPU. Can not be used with nvidia.com/gpumem - #nvidia.com/gpucores: 90 # Utilization limit of this vGPU is set to 50% of total GPU utilization - #nvidia.com/priority: 0 # We only have two priority class, 0(high) and 1(low), default: 1 + nvidia.com/gpu: 2 # declare how many physical GPUs the pod needs + #nvidia.com/gpumem: 3000 # identifies 3000M GPU memory each physical GPU allocates to the pod + nvidia.com/gpumem-percentage: 50 # identifies 50% GPU memory each physical GPU allocates to the pod. Can not be used with nvidia.com/gpumem + #nvidia.com/gpucores: 90 # identifies 90% GPU GPU core each physical GPU allocates to the pod + #nvidia.com/priority: 0 # we only have two priority class, 0(high) and 1(low), default: 1 #The utilization of high priority task won't be limited to resourceCores unless sharing GPU node with other high priority tasks. #The utilization of low priority task won't be limited to resourceCores if no other tasks sharing its GPU. - name: ubuntu-container0 @@ -24,7 +24,7 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs - nvidia.com/gpumem: 2000 - #nvidia.com/gpucores: 90 + nvidia.com/gpu: 2 # declare how many physical GPUs the pod needs + nvidia.com/gpumem: 2000 # identifies 2000M GPU memory each physical GPU allocates to the pod (Optional,Integer) + #nvidia.com/gpucores: 90 # identifies 90% GPU GPU core each physical GPU allocates to the pod diff --git a/examples/nvidia/specify_card_type_not_use.yaml b/examples/nvidia/specify_card_type_not_use.yaml index beb4e63af..a3153f909 100644 --- a/examples/nvidia/specify_card_type_not_use.yaml +++ b/examples/nvidia/specify_card_type_not_use.yaml @@ -12,4 +12,4 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs + nvidia.com/gpu: 2 # declare how many physical GPUs the pod needs diff --git a/examples/nvidia/specify_card_type_to_use.yaml b/examples/nvidia/specify_card_type_to_use.yaml index df45e6e92..ebc605e22 100644 --- a/examples/nvidia/specify_card_type_to_use.yaml +++ b/examples/nvidia/specify_card_type_to_use.yaml @@ -12,4 +12,4 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs + nvidia.com/gpu: 2 # declare how many physical GPUs the pod needs diff --git a/examples/nvidia/specify_scheduling_policy.yaml b/examples/nvidia/specify_scheduling_policy.yaml index 31aed818d..608812d88 100644 --- a/examples/nvidia/specify_scheduling_policy.yaml +++ b/examples/nvidia/specify_scheduling_policy.yaml @@ -12,4 +12,4 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 1 # requesting 2 vGPUs + nvidia.com/gpu: 1 # declare how many physical GPUs the pod needs diff --git a/examples/nvidia/specify_uuid_not_use.yaml b/examples/nvidia/specify_uuid_not_use.yaml index 3255d7eb0..50cd19629 100644 --- a/examples/nvidia/specify_uuid_not_use.yaml +++ b/examples/nvidia/specify_uuid_not_use.yaml @@ -11,4 +11,4 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs \ No newline at end of file + nvidia.com/gpu: 2 # declare how many physical GPUs the pod needs \ No newline at end of file diff --git a/examples/nvidia/specify_uuid_to_use.yaml b/examples/nvidia/specify_uuid_to_use.yaml index a8090b587..eb1f87d92 100644 --- a/examples/nvidia/specify_uuid_to_use.yaml +++ b/examples/nvidia/specify_uuid_to_use.yaml @@ -11,4 +11,4 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 1 # requesting 1 vGPU + nvidia.com/gpu: 1 # declare how many physical GPUs the pod needs diff --git a/examples/nvidia/use_as_normal.yaml b/examples/nvidia/use_as_normal.yaml index 015a857bc..1fbf5e10d 100644 --- a/examples/nvidia/use_as_normal.yaml +++ b/examples/nvidia/use_as_normal.yaml @@ -1,3 +1,4 @@ +# Gpu-pod1 and gpu-pod2 will NOT share the same GPU apiVersion: v1 kind: Pod metadata: @@ -9,7 +10,7 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs + nvidia.com/gpu: 1 # declare how many physical GPUs the pod needs --- apiVersion: v1 kind: Pod @@ -22,5 +23,4 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs -# gpu-pod1 and gpu-pod2 will NOT share the same GPU \ No newline at end of file + nvidia.com/gpu: 1 # declare how many physical GPUs the pod needs \ No newline at end of file diff --git a/examples/nvidia/use_exclusive_card.yaml b/examples/nvidia/use_exclusive_card.yaml index d3abf5866..79171eadb 100644 --- a/examples/nvidia/use_exclusive_card.yaml +++ b/examples/nvidia/use_exclusive_card.yaml @@ -9,6 +9,6 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs - nvidia.com/gpumem-percentage: 100 # Each vGPU contains 100% of the entire GPU device memory (Optional,Integer) - nvidia.com/gpucores: 100 # Each vGPU uses 100% of the entire GPU cores(Optional,Integer) + nvidia.com/gpu: 2 # declare how many physical GPUs the pod needs + nvidia.com/gpumem-percentage: 100 # identifies 100% GPU memory each physical GPU allocates to the pod (Optional,Integer) + nvidia.com/gpucores: 100 # identifies 100% GPU GPU core each physical GPU allocates to the pod(Optional,Integer) diff --git a/examples/nvidia/use_memory_fraction.yaml b/examples/nvidia/use_memory_fraction.yaml index c507af772..18480a095 100644 --- a/examples/nvidia/use_memory_fraction.yaml +++ b/examples/nvidia/use_memory_fraction.yaml @@ -9,6 +9,6 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs - nvidia.com/gpumem-percentage: 50 # Each vGPU contains 50% device memory of that GPU (Optional,Integer) - nvidia.com/gpucores: 30 # Each vGPU uses 30% of the entire GPU (Optional,Integer) + nvidia.com/gpu: 2 # declare how many physical GPUs the pod needs + nvidia.com/gpumem-percentage: 50 # identifies 50% GPU memory each physical GPU allocates to the pod (Optional,Integer) + nvidia.com/gpucores: 30 # identifies 30% GPU GPU core each physical GPU allocates to the pod (Optional,Integer) diff --git a/examples/nvidia/use_sharing_card.yaml b/examples/nvidia/use_sharing_card.yaml new file mode 100644 index 000000000..df61f81e0 --- /dev/null +++ b/examples/nvidia/use_sharing_card.yaml @@ -0,0 +1,30 @@ +# Gpu-pod1 and gpu-pod2 could share the same GPU +apiVersion: v1 +kind: Pod +metadata: + name: gpu-pod1 +spec: + containers: + - name: ubuntu-container + image: ubuntu:18.04 + command: ["bash", "-c", "sleep 86400"] + resources: + limits: + nvidia.com/gpu: 1 # declare how many physical GPUs the pod needs + nvidia.com/gpumem-percentage: 40 # identifies 40% GPU memory each physical GPU allocates to the pod (Optional,Integer) + nvidia.com/gpucores: 60 # identifies 60% GPU GPU core each physical GPU allocates to the pod (Optional,Integer) +--- +apiVersion: v1 +kind: Pod +metadata: + name: gpu-pod2 +spec: + containers: + - name: ubuntu-container + image: ubuntu:18.04 + command: ["bash", "-c", "sleep 86400"] + resources: + limits: + nvidia.com/gpu: 1 # declare how many physical GPUs the pod needs + nvidia.com/gpumem-percentage: 60 # identifies 60% GPU memory each physical GPU allocates to the pod (Optional,Integer) + nvidia.com/gpucores: 40 # identifies 40% GPU GPU core each physical GPU allocates to the pod (Optional,Integer)