From ac27c3d068afc6cd613e33166986d8b8f2c36300 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=80=8D=E9=81=A5?= Date: Tue, 3 Dec 2024 22:12:09 +0800 Subject: [PATCH] update examples: add sharing example and add comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 逍遥 --- docs/develop/tasklist.md | 2 +- example.yaml | 4 +-- examples/nvidia/default_use.yaml | 6 ++-- examples/nvidia/default_use_legacy.yaml | 2 +- examples/nvidia/example.yaml | 12 +++---- .../nvidia/specify_card_type_not_use.yaml | 2 +- examples/nvidia/specify_card_type_to_use.yaml | 2 +- .../nvidia/specify_scheduling_policy.yaml | 2 +- examples/nvidia/specify_uuid_not_use.yaml | 2 +- examples/nvidia/specify_uuid_to_use.yaml | 2 +- examples/nvidia/use_as_normal.yaml | 4 +-- examples/nvidia/use_exclusive_card.yaml | 6 ++-- examples/nvidia/use_memory_fraction.yaml | 6 ++-- examples/nvidia/use_sharing_card.yaml | 32 +++++++++++++++++++ 14 files changed, 58 insertions(+), 26 deletions(-) create mode 100644 examples/nvidia/use_sharing_card.yaml diff --git a/docs/develop/tasklist.md b/docs/develop/tasklist.md index 873366f98..bf56dec69 100644 --- a/docs/develop/tasklist.md +++ b/docs/develop/tasklist.md @@ -113,6 +113,6 @@ spec: command:["bash","-c","sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 VGPUs + nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs ``` diff --git a/example.yaml b/example.yaml index a6afefd31..815540ee5 100644 --- a/example.yaml +++ b/example.yaml @@ -34,8 +34,8 @@ spec: - while true; do /cuda-samples/vectorAdd; done resources: limits: - nvidia.com/gpu: 1 # requesting 1 vGPUs - nvidia.com/gpumem: 3000 # Each vGPU contains 3000m device memory (Optional,Integer) + nvidia.com/gpu: 1 # Declare how many physical GPUs the pod needs + nvidia.com/gpumem: 3000 # Each vGPU contains 3000M device memory (Optional,Integer) terminationMessagePath: /dev/termination-log terminationMessagePolicy: File imagePullPolicy: IfNotPresent diff --git a/examples/nvidia/default_use.yaml b/examples/nvidia/default_use.yaml index ab484967b..7dac2e9a4 100644 --- a/examples/nvidia/default_use.yaml +++ b/examples/nvidia/default_use.yaml @@ -9,6 +9,6 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs - nvidia.com/gpumem: 3000 # Each vGPU contains 3000m device memory (Optional,Integer) - nvidia.com/gpucores: 30 # Each vGPU uses 30% of the entire GPU (Optional,Integer) + nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs + nvidia.com/gpumem: 3000 # Identifies 3000M GPU memory each physical GPU allocates to the pod (Optional,Integer) + nvidia.com/gpucores: 30 # Identifies 30% GPU GPU core each physical GPU allocates to the pod (Optional,Integer) diff --git a/examples/nvidia/default_use_legacy.yaml b/examples/nvidia/default_use_legacy.yaml index 0796a0101..733e3c0a6 100644 --- a/examples/nvidia/default_use_legacy.yaml +++ b/examples/nvidia/default_use_legacy.yaml @@ -9,4 +9,4 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs + nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs diff --git a/examples/nvidia/example.yaml b/examples/nvidia/example.yaml index 0710269b4..b2e730ed9 100644 --- a/examples/nvidia/example.yaml +++ b/examples/nvidia/example.yaml @@ -9,10 +9,10 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs - #nvidia.com/gpumem: 3000 # Each vGPU containers 3000M device memory - nvidia.com/gpumem-percentage: 50 #Each vGPU containers 50% device memory of that GPU. Can not be used with nvidia.com/gpumem - #nvidia.com/gpucores: 90 # Utilization limit of this vGPU is set to 50% of total GPU utilization + nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs + #nvidia.com/gpumem: 3000 # Identifies 3000M GPU memory each physical GPU allocates to the pod + nvidia.com/gpumem-percentage: 50 # Identifies 50% GPU memory each physical GPU allocates to the pod. Can not be used with nvidia.com/gpumem + #nvidia.com/gpucores: 90 # Identifies 90% GPU GPU core each physical GPU allocates to the pod #nvidia.com/priority: 0 # We only have two priority class, 0(high) and 1(low), default: 1 #The utilization of high priority task won't be limited to resourceCores unless sharing GPU node with other high priority tasks. #The utilization of low priority task won't be limited to resourceCores if no other tasks sharing its GPU. @@ -24,7 +24,7 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs + nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs nvidia.com/gpumem: 2000 - #nvidia.com/gpucores: 90 + #nvidia.com/gpucores: 90 # Identifies 90% GPU GPU core each physical GPU allocates to the pod diff --git a/examples/nvidia/specify_card_type_not_use.yaml b/examples/nvidia/specify_card_type_not_use.yaml index beb4e63af..8a03ffc38 100644 --- a/examples/nvidia/specify_card_type_not_use.yaml +++ b/examples/nvidia/specify_card_type_not_use.yaml @@ -12,4 +12,4 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs + nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs diff --git a/examples/nvidia/specify_card_type_to_use.yaml b/examples/nvidia/specify_card_type_to_use.yaml index df45e6e92..8784ede31 100644 --- a/examples/nvidia/specify_card_type_to_use.yaml +++ b/examples/nvidia/specify_card_type_to_use.yaml @@ -12,4 +12,4 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs + nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs diff --git a/examples/nvidia/specify_scheduling_policy.yaml b/examples/nvidia/specify_scheduling_policy.yaml index 31aed818d..76c487ae6 100644 --- a/examples/nvidia/specify_scheduling_policy.yaml +++ b/examples/nvidia/specify_scheduling_policy.yaml @@ -12,4 +12,4 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 1 # requesting 2 vGPUs + nvidia.com/gpu: 1 # Declare how many physical GPUs the pod needs diff --git a/examples/nvidia/specify_uuid_not_use.yaml b/examples/nvidia/specify_uuid_not_use.yaml index 3255d7eb0..c129baf35 100644 --- a/examples/nvidia/specify_uuid_not_use.yaml +++ b/examples/nvidia/specify_uuid_not_use.yaml @@ -11,4 +11,4 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs \ No newline at end of file + nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs \ No newline at end of file diff --git a/examples/nvidia/specify_uuid_to_use.yaml b/examples/nvidia/specify_uuid_to_use.yaml index c696958d6..35da6e256 100644 --- a/examples/nvidia/specify_uuid_to_use.yaml +++ b/examples/nvidia/specify_uuid_to_use.yaml @@ -11,4 +11,4 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs + nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs diff --git a/examples/nvidia/use_as_normal.yaml b/examples/nvidia/use_as_normal.yaml index 015a857bc..326c425a1 100644 --- a/examples/nvidia/use_as_normal.yaml +++ b/examples/nvidia/use_as_normal.yaml @@ -9,7 +9,7 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs + nvidia.com/gpu: 1 # Declare how many physical GPUs the pod needs --- apiVersion: v1 kind: Pod @@ -22,5 +22,5 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs + nvidia.com/gpu: 1 # Declare how many physical GPUs the pod needs # gpu-pod1 and gpu-pod2 will NOT share the same GPU \ No newline at end of file diff --git a/examples/nvidia/use_exclusive_card.yaml b/examples/nvidia/use_exclusive_card.yaml index d3abf5866..7891e6be9 100644 --- a/examples/nvidia/use_exclusive_card.yaml +++ b/examples/nvidia/use_exclusive_card.yaml @@ -9,6 +9,6 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs - nvidia.com/gpumem-percentage: 100 # Each vGPU contains 100% of the entire GPU device memory (Optional,Integer) - nvidia.com/gpucores: 100 # Each vGPU uses 100% of the entire GPU cores(Optional,Integer) + nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs + nvidia.com/gpumem-percentage: 100 # Identifies 100% GPU memory each physical GPU allocates to the pod (Optional,Integer) + nvidia.com/gpucores: 100 # Identifies 100% GPU GPU core each physical GPU allocates to the pod(Optional,Integer) diff --git a/examples/nvidia/use_memory_fraction.yaml b/examples/nvidia/use_memory_fraction.yaml index c507af772..c758d5f77 100644 --- a/examples/nvidia/use_memory_fraction.yaml +++ b/examples/nvidia/use_memory_fraction.yaml @@ -9,6 +9,6 @@ spec: command: ["bash", "-c", "sleep 86400"] resources: limits: - nvidia.com/gpu: 2 # requesting 2 vGPUs - nvidia.com/gpumem-percentage: 50 # Each vGPU contains 50% device memory of that GPU (Optional,Integer) - nvidia.com/gpucores: 30 # Each vGPU uses 30% of the entire GPU (Optional,Integer) + nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs + nvidia.com/gpumem-percentage: 50 # Identifies 50% GPU memory each physical GPU allocates to the pod (Optional,Integer) + nvidia.com/gpucores: 30 # Identifies 30% GPU GPU core each physical GPU allocates to the pod (Optional,Integer) diff --git a/examples/nvidia/use_sharing_card.yaml b/examples/nvidia/use_sharing_card.yaml new file mode 100644 index 000000000..d052ce46f --- /dev/null +++ b/examples/nvidia/use_sharing_card.yaml @@ -0,0 +1,32 @@ +apiVersion: v1 +kind: Pod +metadata: + name: gpu-pod1 +spec: + containers: + - name: ubuntu-container + image: ubuntu:18.04 + command: ["bash", "-c", "sleep 86400"] + resources: + limits: + nvidia.com/gpu: 1 # Declare how many physical GPUs the pod needs + nvidia.com/gpumem-percentage: 40 # Identifies 40% GPU memory each physical GPU allocates to the pod (Optional,Integer) + nvidia.com/gpucores: 60 # Identifies 60% GPU GPU core each physical GPU allocates to the pod (Optional,Integer) + +--- +apiVersion: v1 +kind: Pod +metadata: + name: gpu-pod2 +spec: + containers: + - name: ubuntu-container + image: ubuntu:18.04 + command: ["bash", "-c", "sleep 86400"] + resources: + limits: + nvidia.com/gpu: 1 # Declare how many physical GPUs the pod needs + nvidia.com/gpumem-percentage: 60 # Identifies 60% GPU memory each physical GPU allocates to the pod (Optional,Integer) + nvidia.com/gpucores: 40 # Identifies 40% GPU GPU core each physical GPU allocates to the pod (Optional,Integer) + +# gpu-pod1 and gpu-pod2 could share the same GPU \ No newline at end of file