From c29f8ed6d85f7197b6dc564a9c0a30df5c909b72 Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Fri, 3 Oct 2025 12:29:59 +0100 Subject: [PATCH 1/8] Try to expose the GPU count --- charts/azimuth-chat/azimuth-ui.schema.yaml | 1 + charts/azimuth-llm/values.schema.json | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/charts/azimuth-chat/azimuth-ui.schema.yaml b/charts/azimuth-chat/azimuth-ui.schema.yaml index 74bd573c..a1f14887 100644 --- a/charts/azimuth-chat/azimuth-ui.schema.yaml +++ b/charts/azimuth-chat/azimuth-ui.schema.yaml @@ -24,6 +24,7 @@ sortOrder: - /azimuth-llm/ui/appSettings/model_instruction - /azimuth-llm/ui/appSettings/page_title - /azimuth-llm/api/image/version + - /azimuth-llm/api/gpus - /azimuth-llm/ui/appSettings/llm_params/temperature - /azimuth-llm/ui/appSettings/llm_params/max_tokens - /azimuth-llm/ui/appSettings/llm_params/frequency_penalty diff --git a/charts/azimuth-llm/values.schema.json b/charts/azimuth-llm/values.schema.json index 59e0e1b8..c13caea6 100644 --- a/charts/azimuth-llm/values.schema.json +++ b/charts/azimuth-llm/values.schema.json @@ -40,6 +40,14 @@ "default": "v0.10.2" } } + }, + "gpus": { + "type": "integer", + "title": "GPU Count", + "description": "The number of GPUs to allocate to the model.", + "default": 1, + "minimum": 1, + "maximum": 8 } } }, From 96bd4ae847b5771aac70a32c7fb3d9e2dc819dac Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Fri, 3 Oct 2025 14:59:20 +0100 Subject: [PATCH 2/8] Try to set --tensor-parallel-size based on GPU count --- charts/azimuth-llm/templates/api/deployment.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/charts/azimuth-llm/templates/api/deployment.yml b/charts/azimuth-llm/templates/api/deployment.yml index 0e6206d5..00bc9af5 100644 --- a/charts/azimuth-llm/templates/api/deployment.yml +++ b/charts/azimuth-llm/templates/api/deployment.yml @@ -47,6 +47,10 @@ spec: {{- if .Values.api.extraArgs -}} {{- .Values.api.extraArgs | toYaml | nindent 10 }} {{- end -}} + {{- if gt .Values.api.gpus 1 }} + - --tensor-parallel-size + - {{ .Values.api.gpus }} + {{- end }} {{- if .Values.huggingface.secretName -}} envFrom: - secretRef: From 0ef9bb02e5e1848269ec3badf5439650d26ab90d Mon Sep 17 00:00:00 2001 From: wtripp180901 Date: Thu, 16 Oct 2025 14:21:37 +0100 Subject: [PATCH 3/8] cast gpu counts --- charts/azimuth-llm/templates/api/deployment.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/azimuth-llm/templates/api/deployment.yml b/charts/azimuth-llm/templates/api/deployment.yml index 00bc9af5..1e951d98 100644 --- a/charts/azimuth-llm/templates/api/deployment.yml +++ b/charts/azimuth-llm/templates/api/deployment.yml @@ -47,9 +47,9 @@ spec: {{- if .Values.api.extraArgs -}} {{- .Values.api.extraArgs | toYaml | nindent 10 }} {{- end -}} - {{- if gt .Values.api.gpus 1 }} + {{- if gt (.Values.api.gpus | int) 1 }} - --tensor-parallel-size - - {{ .Values.api.gpus }} + - {{ (.Values.api.gpus | int) }} {{- end }} {{- if .Values.huggingface.secretName -}} envFrom: From 98151ddb0ee42c6987ca2f4011aa8b6d2caaad99 Mon Sep 17 00:00:00 2001 From: wtripp180901 Date: Thu, 16 Oct 2025 14:56:01 +0100 Subject: [PATCH 4/8] Added GPUs to chat schema --- charts/azimuth-chat/values.schema.json | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/charts/azimuth-chat/values.schema.json b/charts/azimuth-chat/values.schema.json index 0fb9ee0d..780b6603 100644 --- a/charts/azimuth-chat/values.schema.json +++ b/charts/azimuth-chat/values.schema.json @@ -43,6 +43,14 @@ "default": "v0.10.2" } } + }, + "gpus": { + "type": "integer", + "title": "GPU Count", + "description": "The number of GPUs to allocate to the model.", + "default": 1, + "minimum": 1, + "maximum": 8 } } }, From aeec064a5e26955caba1bd0447fdc563403ec501 Mon Sep 17 00:00:00 2001 From: wtripp180901 Date: Thu, 16 Oct 2025 15:13:53 +0100 Subject: [PATCH 5/8] revert to string in args --- charts/azimuth-llm/templates/api/deployment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/azimuth-llm/templates/api/deployment.yml b/charts/azimuth-llm/templates/api/deployment.yml index 1e951d98..8feb0136 100644 --- a/charts/azimuth-llm/templates/api/deployment.yml +++ b/charts/azimuth-llm/templates/api/deployment.yml @@ -49,7 +49,7 @@ spec: {{- end -}} {{- if gt (.Values.api.gpus | int) 1 }} - --tensor-parallel-size - - {{ (.Values.api.gpus | int) }} + - {{ .Values.api.gpus }} {{- end }} {{- if .Values.huggingface.secretName -}} envFrom: From d7196adc5188f3462eb2f184fdbc3025733ae9fd Mon Sep 17 00:00:00 2001 From: wtripp180901 Date: Thu, 16 Oct 2025 15:20:44 +0100 Subject: [PATCH 6/8] add quote to args --- charts/azimuth-llm/templates/api/deployment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/azimuth-llm/templates/api/deployment.yml b/charts/azimuth-llm/templates/api/deployment.yml index 8feb0136..074dbd69 100644 --- a/charts/azimuth-llm/templates/api/deployment.yml +++ b/charts/azimuth-llm/templates/api/deployment.yml @@ -49,7 +49,7 @@ spec: {{- end -}} {{- if gt (.Values.api.gpus | int) 1 }} - --tensor-parallel-size - - {{ .Values.api.gpus }} + - {{ .Values.api.gpus | quote }} {{- end }} {{- if .Values.huggingface.secretName -}} envFrom: From fa22b9cf8d1b616506f38573586a73e54d30b6e3 Mon Sep 17 00:00:00 2001 From: wtripp180901 <78219569+wtripp180901@users.noreply.github.com> Date: Mon, 20 Oct 2025 16:05:23 +0100 Subject: [PATCH 7/8] Set minimum GPUs to zero in schema --- charts/azimuth-chat/values.schema.json | 2 +- charts/azimuth-llm/values.schema.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/azimuth-chat/values.schema.json b/charts/azimuth-chat/values.schema.json index 780b6603..414242b7 100644 --- a/charts/azimuth-chat/values.schema.json +++ b/charts/azimuth-chat/values.schema.json @@ -49,7 +49,7 @@ "title": "GPU Count", "description": "The number of GPUs to allocate to the model.", "default": 1, - "minimum": 1, + "minimum": 0, "maximum": 8 } } diff --git a/charts/azimuth-llm/values.schema.json b/charts/azimuth-llm/values.schema.json index c13caea6..1a3e4895 100644 --- a/charts/azimuth-llm/values.schema.json +++ b/charts/azimuth-llm/values.schema.json @@ -46,7 +46,7 @@ "title": "GPU Count", "description": "The number of GPUs to allocate to the model.", "default": 1, - "minimum": 1, + "minimum": 0, "maximum": 8 } } From a2ccd846968368d0710f4a879b3569454cce6613 Mon Sep 17 00:00:00 2001 From: wtripp180901 Date: Thu, 30 Oct 2025 10:47:51 +0000 Subject: [PATCH 8/8] Updated schema descriptions --- charts/azimuth-chat/values.schema.json | 2 +- charts/azimuth-llm/values.schema.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/azimuth-chat/values.schema.json b/charts/azimuth-chat/values.schema.json index 414242b7..4b64ad8f 100644 --- a/charts/azimuth-chat/values.schema.json +++ b/charts/azimuth-chat/values.schema.json @@ -47,7 +47,7 @@ "gpus": { "type": "integer", "title": "GPU Count", - "description": "The number of GPUs to allocate to the model.", + "description": "The number of GPUs (within a single node) to allocate to the model.", "default": 1, "minimum": 0, "maximum": 8 diff --git a/charts/azimuth-llm/values.schema.json b/charts/azimuth-llm/values.schema.json index 1a3e4895..9a7bae50 100644 --- a/charts/azimuth-llm/values.schema.json +++ b/charts/azimuth-llm/values.schema.json @@ -44,7 +44,7 @@ "gpus": { "type": "integer", "title": "GPU Count", - "description": "The number of GPUs to allocate to the model.", + "description": "The number of GPUs (within a single node) to allocate to the model.", "default": 1, "minimum": 0, "maximum": 8