diff --git a/helm_chart/HyperPodHelmChart/charts/health-monitoring-agent/values.yaml b/helm_chart/HyperPodHelmChart/charts/health-monitoring-agent/values.yaml index 6622f1cf..c54910e1 100644 --- a/helm_chart/HyperPodHelmChart/charts/health-monitoring-agent/values.yaml +++ b/helm_chart/HyperPodHelmChart/charts/health-monitoring-agent/values.yaml @@ -1,2 +1,32 @@ namespace: "aws-hyperpod" -hmaimage: "905418368575.dkr.ecr.us-west-2.amazonaws.com/hyperpod-health-monitoring-agent:1.0.674.0_1.0.199.0" \ No newline at end of file + +# AWS region for the health monitoring agent ECR image +# The chart automatically detects the region from Kubernetes cluster context. +# Only specify this if you want to override the automatic detection. +# +# Automatic detection priority: +# 1. This explicit region setting (highest priority) +# 2. Global region setting (global.region) +# 3. Kubernetes cluster context detection: +# - EKS API server URL patterns +# - Node topology labels (topology.kubernetes.io/region) +# - AWS provider IDs in node specifications +# - Legacy region labels (failure-domain.beta.kubernetes.io/region) +# 4. Default fallback: us-west-2 +# +# Supported regions: us-east-1, us-west-2, us-east-2, us-west-1, eu-central-1, +# eu-north-1, eu-west-1, eu-west-2, ap-northeast-1, ap-south-1, ap-southeast-1, +# ap-southeast-2, sa-east-1 +region: "" + +# Image tag for health monitoring agent +# If not specified, uses global.imageTag or defaults to hardcoded version +imageTag: "" + +# Override the health monitoring agent image URI +# If specified, this will override the automatic region-based URI selection +# Example: "905418368575.dkr.ecr.us-west-2.amazonaws.com/hyperpod-health-monitoring-agent:1.0.674.0_1.0.199.0" +hmaimage: "" + +# Enable debug output for region selection process +debug: true \ No newline at end of file diff --git a/helm_chart/readme.md b/helm_chart/readme.md index 2b6fe6e5..b6a47b48 100644 --- a/helm_chart/readme.md +++ b/helm_chart/readme.md @@ -171,19 +171,19 @@ helm upgrade dependencies helm_chart/HyperPodHelmChart --namespace kube-system - Training job auto resume is expected to work with Kubeflow training operator release v1.7.0, v1.8.0, v1.8.1 https://github.com/kubeflow/training-operator/releases - If you intend to use the Health Monitoring Agent container image from another region, please see below list to find relevant region's URI. ``` - IAD 767398015722.dkr.ecr.us-east-1.amazonaws.com/hyperpod-health-monitoring-agent:1.0.674.0_1.0.199.0 - PDX 905418368575.dkr.ecr.us-west-2.amazonaws.com/hyperpod-health-monitoring-agent:1.0.674.0_1.0.199.0 - CMH 851725546812.dkr.ecr.us-east-2.amazonaws.com/hyperpod-health-monitoring-agent:1.0.674.0_1.0.199.0 - SFO 011528288828.dkr.ecr.us-west-1.amazonaws.com/hyperpod-health-monitoring-agent:1.0.674.0_1.0.199.0 - FRA 211125453373.dkr.ecr.eu-central-1.amazonaws.com/hyperpod-health-monitoring-agent:1.0.674.0_1.0.199.0 - ARN 654654141839.dkr.ecr.eu-north-1.amazonaws.com/hyperpod-health-monitoring-agent:1.0.674.0_1.0.199.0 - DUB 533267293120.dkr.ecr.eu-west-1.amazonaws.com/hyperpod-health-monitoring-agent:1.0.674.0_1.0.199.0 - LHR 011528288831.dkr.ecr.eu-west-2.amazonaws.com/hyperpod-health-monitoring-agent:1.0.674.0_1.0.199.0 - NRT 533267052152.dkr.ecr.ap-northeast-1.amazonaws.com/hyperpod-health-monitoring-agent:1.0.674.0_1.0.199.0 - BOM 011528288864.dkr.ecr.ap-south-1.amazonaws.com/hyperpod-health-monitoring-agent:1.0.674.0_1.0.199.0 - SIN 905418428165.dkr.ecr.ap-southeast-1.amazonaws.com/hyperpod-health-monitoring-agent:1.0.674.0_1.0.199.0 - SYD 851725636348.dkr.ecr.ap-southeast-2.amazonaws.com/hyperpod-health-monitoring-agent:1.0.674.0_1.0.199.0 - GRU 025066253954.dkr.ecr.sa-east-1.amazonaws.com/hyperpod-health-monitoring-agent:1.0.674.0_1.0.199.0 + IAD 767398015722.dkr.ecr.us-east-1.amazonaws.com/hyperpod-health-monitoring-agent:1.0.448.0_1.0.115.0 + PDX 905418368575.dkr.ecr.us-west-2.amazonaws.com/hyperpod-health-monitoring-agent:1.0.448.0_1.0.115.0 + CMH 851725546812.dkr.ecr.us-east-2.amazonaws.com/hyperpod-health-monitoring-agent:1.0.448.0_1.0.115.0 + SFO 011528288828.dkr.ecr.us-west-1.amazonaws.com/hyperpod-health-monitoring-agent:1.0.448.0_1.0.115.0 + FRA 211125453373.dkr.ecr.eu-central-1.amazonaws.com/hyperpod-health-monitoring-agent:1.0.448.0_1.0.115.0 + ARN 654654141839.dkr.ecr.eu-north-1.amazonaws.com/hyperpod-health-monitoring-agent:1.0.448.0_1.0.115.0 + DUB 533267293120.dkr.ecr.eu-west-1.amazonaws.com/hyperpod-health-monitoring-agent:1.0.448.0_1.0.115.0 + LHR 011528288831.dkr.ecr.eu-west-2.amazonaws.com/hyperpod-health-monitoring-agent:1.0.448.0_1.0.115.0 + NRT 533267052152.dkr.ecr.ap-northeast-1.amazonaws.com/hyperpod-health-monitoring-agent:1.0.448.0_1.0.115.0 + BOM 011528288864.dkr.ecr.ap-south-1.amazonaws.com/hyperpod-health-monitoring-agent:1.0.448.0_1.0.115.0 + SIN 905418428165.dkr.ecr.ap-southeast-1.amazonaws.com/hyperpod-health-monitoring-agent:1.0.448.0_1.0.115.0 + SYD 851725636348.dkr.ecr.ap-southeast-2.amazonaws.com/hyperpod-health-monitoring-agent:1.0.448.0_1.0.115.0 + GRU 025066253954.dkr.ecr.sa-east-1.amazonaws.com/hyperpod-health-monitoring-agent:1.0.448.0_1.0.115.0 ``` ## 7. Troubleshooting