diff --git a/CHANGELOG.md b/CHANGELOG.md index ca933df2..c9965c26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Move SLO reporting dashboard to be public. +- Get rid of the `app` label in Atlas dashboards. ### Fixed @@ -136,7 +137,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Add a CAPA aggregated error logs dashboard. - + ## [3.10.4] - 2024-04-10 ### Fixed @@ -579,7 +580,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -- Add AWS ENA Performance +- Add AWS ENA Performance - Updated team labels for team-rocket - Add graph in Node Overview to identify emptydir growth - Update kube-mixins to 0.12 diff --git a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-operational.json b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-operational.json index d9c0a838..f54cc036 100644 --- a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-operational.json +++ b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-operational.json @@ -2620,7 +2620,7 @@ "uid": "$datasource" }, "editorMode": "code", - "expr": "1-\n(\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster_id=\"$cluster_id\", app=\"kubelet\", namespace=~\"$namespace\", persistentvolumeclaim=~\"data-loki-write-.*\"})\n /\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster_id=\"$cluster_id\", app=\"kubelet\", namespace=~\"$namespace\", persistentvolumeclaim=~\"data-loki-write-.*\"})\n)\n", + "expr": "1-\n(\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster_id=\"$cluster_id\", job=\"kubelet\", namespace=~\"$namespace\", persistentvolumeclaim=~\"data-loki-write-.*\"})\n /\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster_id=\"$cluster_id\", job=\"kubelet\", namespace=~\"$namespace\", persistentvolumeclaim=~\"data-loki-write-.*\"})\n)\n", "hide": false, "instant": false, "legendFormat": "{{persistentvolumeclaim}}", @@ -4246,7 +4246,7 @@ "uid": "$datasource" }, "editorMode": "code", - "expr": "1-\n(\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster_id=\"$cluster_id\", app=\"kubelet\", namespace=~\"$namespace\", persistentvolumeclaim=~\"data-loki-backend-.*\"})\n /\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster_id=\"$cluster_id\", app=\"kubelet\", namespace=~\"$namespace\", persistentvolumeclaim=~\"data-loki-backend-.*\"})\n)\n", + "expr": "1-\n(\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster_id=\"$cluster_id\", job=\"kubelet\", namespace=~\"$namespace\", persistentvolumeclaim=~\"data-loki-backend-.*\"})\n /\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster_id=\"$cluster_id\", job=\"kubelet\", namespace=~\"$namespace\", persistentvolumeclaim=~\"data-loki-backend-.*\"})\n)\n", "hide": false, "instant": false, "legendFormat": "{{persistentvolumeclaim}}", diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/operatorkit.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/operatorkit.json index 424dbbf1..01026cf1 100644 --- a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/operatorkit.json +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/operatorkit.json @@ -170,7 +170,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(operatorkit_controller_operation_bucket{app=\"$app\", operation=\"$operation\"}[$__rate_interval])) by (le)", + "expr": "sum(rate(operatorkit_controller_operation_bucket{job=\"$job\", operation=\"$operation\"}[$__rate_interval])) by (le)", "format": "heatmap", "instant": false, "legendFormat": "__auto", @@ -287,7 +287,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "histogram_quantile(0.99, sum(irate(operatorkit_controller_operation_bucket{app=\"$app\", operation=\"$operation\"}[5m])) by (app, event, le))", + "expr": "histogram_quantile(0.99, sum(irate(operatorkit_controller_operation_bucket{job=\"$job\", operation=\"$operation\"}[5m])) by (job, event, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "99%", @@ -300,7 +300,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "histogram_quantile(0.9, sum(irate(operatorkit_controller_operation_bucket{app=\"$app\", operation=\"$operation\"}[5m])) by (app, operation, le))", + "expr": "histogram_quantile(0.9, sum(irate(operatorkit_controller_operation_bucket{job=\"$job\", operation=\"$operation\"}[5m])) by (job, operation, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "90%", @@ -312,7 +312,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "histogram_quantile(0.75, sum(irate(operatorkit_controller_operation_bucket{app=\"$app\", operation=\"$operation\"}[5m])) by (app, operation, le))", + "expr": "histogram_quantile(0.75, sum(irate(operatorkit_controller_operation_bucket{job=\"$job\", operation=\"$operation\"}[5m])) by (job, operation, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "75%", @@ -429,7 +429,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(operatorkit_controller_event_bucket{app=\"$app\", event=\"$event\"}[$__rate_interval])) by (le)", + "expr": "sum(rate(operatorkit_controller_event_bucket{job=\"$job\", event=\"$event\"}[$__rate_interval])) by (le)", "format": "heatmap", "instant": false, "legendFormat": "__auto", @@ -546,7 +546,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "histogram_quantile(0.99, sum(irate(operatorkit_controller_event_bucket{app=\"$app\", event=\"$event\"}[5m])) by (app, event, le))", + "expr": "histogram_quantile(0.99, sum(irate(operatorkit_controller_event_bucket{job=\"$job\", event=\"$event\"}[5m])) by (job, event, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "99%", @@ -559,7 +559,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "histogram_quantile(0.9, sum(irate(operatorkit_controller_event_bucket{app=\"$app\", event=\"$event\"}[5m])) by (app, event, le))", + "expr": "histogram_quantile(0.9, sum(irate(operatorkit_controller_event_bucket{job=\"$job\", event=\"$event\"}[5m])) by (job, event, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "90%", @@ -571,7 +571,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "histogram_quantile(0.75, sum(irate(operatorkit_controller_event_bucket{app=\"$app\", event=\"$event\"}[5m])) by (app, event, le))", + "expr": "histogram_quantile(0.75, sum(irate(operatorkit_controller_event_bucket{job=\"$job\", event=\"$event\"}[5m])) by (job, event, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "75%", @@ -687,7 +687,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(operatorkit_controller_error_total{app=\"$app\"}) by (instance)", + "expr": "sum(operatorkit_controller_error_total{job=\"$job\"}) by (instance)", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -703,7 +703,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(operatorkit_controller_errors_total{app=\"$app\"}) by (pod)", + "expr": "sum(operatorkit_controller_errors_total{job=\"$job\"}) by (pod)", "hide": false, "legendFormat": "pod {{pod}}", "range": true, @@ -807,7 +807,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(changes(giantswarm_build_info{app=\"$app\"}[5m])) by (commit, golang_version, version) + 1", + "expr": "sum(changes(giantswarm_build_info{job=\"$job\"}[5m])) by (commit, golang_version, version) + 1", "format": "table", "intervalFactor": 2, "legendFormat": "{{version}} - {{golang_version}} - {{commit}}", @@ -910,14 +910,14 @@ "type": "prometheus", "uid": "${datasource}" }, - "definition": "label_values(operatorkit_controller_event_count, app)", + "definition": "label_values(operatorkit_controller_event_count, job)", "hide": 0, "includeAll": false, "multi": false, - "name": "app", + "name": "job", "options": [], "query": { - "query": "label_values(operatorkit_controller_event_count, app)", + "query": "label_values(operatorkit_controller_event_count, job)", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -940,14 +940,14 @@ "type": "prometheus", "uid": "${datasource}" }, - "definition": "label_values(operatorkit_controller_event_count{app=\"$app\"}, event)", + "definition": "label_values(operatorkit_controller_event_count{job=\"$job\"}, event)", "hide": 0, "includeAll": true, "multi": true, "name": "event", "options": [], "query": { - "query": "label_values(operatorkit_controller_event_count{app=\"$app\"}, event)", + "query": "label_values(operatorkit_controller_event_count{job=\"$job\"}, event)", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -970,14 +970,14 @@ "type": "prometheus", "uid": "${datasource}" }, - "definition": "label_values(operatorkit_controller_operation_count{app=\"$app\"}, operation)", + "definition": "label_values(operatorkit_controller_operation_count{job=\"$job\"}, operation)", "hide": 0, "includeAll": true, "multi": true, "name": "operation", "options": [], "query": { - "query": "label_values(operatorkit_controller_operation_count{app=\"$app\"}, operation)", + "query": "label_values(operatorkit_controller_operation_count{job=\"$job\"}, operation)", "refId": "StandardVariableQuery" }, "refresh": 2, diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/prometheus-cost-estimation.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/prometheus-cost-estimation.json index 932f056f..2321460e 100644 --- a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/prometheus-cost-estimation.json +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/prometheus-cost-estimation.json @@ -750,7 +750,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "max (\n kubelet_volume_stats_capacity_bytes{cluster_type=~\"management_cluster\", app=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n - \n kubelet_volume_stats_available_bytes{cluster_type=~\"management_cluster\", app=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n ) by (persistentvolumeclaim)", + "expr": "max (\n kubelet_volume_stats_capacity_bytes{cluster_type=~\"management_cluster\", job=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n - \n kubelet_volume_stats_available_bytes{cluster_type=~\"management_cluster\", job=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n ) by (persistentvolumeclaim)", "legendFormat": "{{persistentvolumeclaim}}", "range": true, "refId": "A" @@ -761,7 +761,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(max (\n kubelet_volume_stats_capacity_bytes{cluster_type=~\"management_cluster\", app=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n - \n kubelet_volume_stats_available_bytes{cluster_type=~\"management_cluster\", app=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n ) by (persistentvolumeclaim))", + "expr": "sum(max (\n kubelet_volume_stats_capacity_bytes{cluster_type=~\"management_cluster\", job=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n - \n kubelet_volume_stats_available_bytes{cluster_type=~\"management_cluster\", job=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n ) by (persistentvolumeclaim))", "hide": false, "legendFormat": "Total space used", "range": true, @@ -830,7 +830,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(max (\n kubelet_volume_stats_capacity_bytes{cluster_type=~\"management_cluster\", app=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n - \n kubelet_volume_stats_available_bytes{cluster_type=~\"management_cluster\", app=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n ) by (persistentvolumeclaim))", + "expr": "sum(max (\n kubelet_volume_stats_capacity_bytes{cluster_type=~\"management_cluster\", job=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n - \n kubelet_volume_stats_available_bytes{cluster_type=~\"management_cluster\", job=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n ) by (persistentvolumeclaim))", "legendFormat": "__auto", "range": true, "refId": "A" diff --git a/helm/dashboards/charts/public_dashboards/dashboards/shared/public/pod-request-vs-usage.json b/helm/dashboards/charts/public_dashboards/dashboards/shared/public/pod-request-vs-usage.json index 8cb807e5..1f3675ea 100644 --- a/helm/dashboards/charts/public_dashboards/dashboards/shared/public/pod-request-vs-usage.json +++ b/helm/dashboards/charts/public_dashboards/dashboards/shared/public/pod-request-vs-usage.json @@ -150,7 +150,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sort(\n bottomk(10,\n max(\n label_replace(\n avg_over_time(\n container_memory_working_set_bytes{app=\"cadvisor\", cluster_id=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\", pod=~\"($container).*\"}[$__range]\n ),\"shortpod\",\"$1\",\"pod\",\"(.*)-.*\"\n )\n ) by (shortpod)\n /\n max(\n label_replace(\n cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster_id=\"$cluster\", namespace=\"$namespace\", pod=~\"($container).*\"}\n ,\"shortpod\",\"$1\",\"pod\",\"(.*)-.*\"\n )\n ) by (shortpod)\n *100)\n)", + "expr": "sort(\n bottomk(10,\n max(\n label_replace(\n avg_over_time(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster_id=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\", pod=~\"($container).*\"}[$__range]\n ),\"shortpod\",\"$1\",\"pod\",\"(.*)-.*\"\n )\n ) by (shortpod)\n /\n max(\n label_replace(\n cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster_id=\"$cluster\", namespace=\"$namespace\", pod=~\"($container).*\"}\n ,\"shortpod\",\"$1\",\"pod\",\"(.*)-.*\"\n )\n ) by (shortpod)\n *100)\n)", "hide": false, "instant": true, "interval": "", @@ -383,7 +383,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "max(label_replace(container_memory_working_set_bytes{app=\"cadvisor\", cluster_id=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\", pod=~\"($container).*\"},\"shortpod\",\"$1\",\"pod\",\"(.*)-.*\")) by (shortpod)", + "expr": "max(label_replace(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster_id=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\", pod=~\"($container).*\"},\"shortpod\",\"$1\",\"pod\",\"(.*)-.*\")) by (shortpod)", "interval": "", "legendFormat": "usage for {{shortpod}}", "range": true, @@ -432,7 +432,7 @@ "type": "prometheus", "uid": "$datasource" }, - "definition": "label_values(up{app=\"kube-state-metrics\"}, cluster_id)", + "definition": "label_values(up{job=\"kube-state-metrics\"}, cluster_id)", "hide": 0, "includeAll": false, "label": "Cluster", @@ -440,7 +440,7 @@ "name": "cluster", "options": [], "query": { - "query": "label_values(up{app=\"kube-state-metrics\"}, cluster_id)", + "query": "label_values(up{job=\"kube-state-metrics\"}, cluster_id)", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -462,14 +462,14 @@ "type": "prometheus", "uid": "$datasource" }, - "definition": "label_values(kube_namespace_status_phase{app=\"kube-state-metrics\", cluster_id=\"$cluster\"}, namespace)", + "definition": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster_id=\"$cluster\"}, namespace)", "hide": 0, "includeAll": false, "multi": false, "name": "namespace", "options": [], "query": { - "query": "label_values(kube_namespace_status_phase{app=\"kube-state-metrics\", cluster_id=\"$cluster\"}, namespace)", + "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster_id=\"$cluster\"}, namespace)", "refId": "StandardVariableQuery" }, "refresh": 2, diff --git a/helm/dashboards/charts/public_dashboards/dashboards/shared/public/prometheus.json b/helm/dashboards/charts/public_dashboards/dashboards/shared/public/prometheus.json index f9e8ea08..577e10cc 100644 --- a/helm/dashboards/charts/public_dashboards/dashboards/shared/public/prometheus.json +++ b/helm/dashboards/charts/public_dashboards/dashboards/shared/public/prometheus.json @@ -36,7 +36,7 @@ "uid": "P8E80F9AEF21F6940" }, "enable": true, - "expr": "{app=\"prometheus\", instance=~\"$cluster\"} |= `Starting Prometheus Server`", + "expr": "{container=\"prometheus\", instance=~\"$cluster\"} |= `Starting Prometheus Server`", "iconColor": "green", "instant": false, "name": "Prometheus Start logs", @@ -115,7 +115,7 @@ "uid": "P8E80F9AEF21F6940" }, "editorMode": "code", - "expr": "{app=\"prometheus\", instance=~\"$cluster\"}", + "expr": "{container=\"prometheus\", instance=~\"$cluster\"}", "queryType": "range", "refId": "A" } @@ -568,7 +568,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "prometheus_rule_evaluation_duration_seconds{app=\"prometheus\",quantile=\"0.99\", cluster_id=~\"($cluster)\"}", + "expr": "prometheus_rule_evaluation_duration_seconds{job=~\"($cluster)-prometheus.*\", quantile=\"0.99\", cluster_id=~\"($cluster)\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster_id}} - 0.99 quantile", @@ -582,7 +582,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "prometheus_rule_evaluation_duration_seconds{app=\"prometheus\",quantile=\"0.9\", cluster_id=~\"($cluster)\"}", + "expr": "prometheus_rule_evaluation_duration_seconds{job=~\"($cluster)-prometheus.*\", quantile=\"0.9\", cluster_id=~\"($cluster)\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster_id}} - 0.9 quantile", @@ -596,7 +596,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "prometheus_rule_evaluation_duration_seconds{app=\"prometheus\",quantile=\"0.5\", cluster_id=~\"($cluster)\"}", + "expr": "prometheus_rule_evaluation_duration_seconds{job=~\"($cluster)-prometheus.*\", quantile=\"0.5\", cluster_id=~\"($cluster)\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster_id}} - 0.5 quantile", @@ -701,7 +701,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "prometheus_notifications_latency_seconds{app=\"prometheus\",quantile=\"0.99\", cluster_id=~\"($cluster)\"}", + "expr": "prometheus_notifications_latency_seconds{job=~\"($cluster)-prometheus.*\", quantile=\"0.99\", cluster_id=~\"($cluster)\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster_id}} - 0.99 quantile", @@ -715,7 +715,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "prometheus_notifications_latency_seconds{app=\"prometheus\",quantile=\"0.9\", cluster_id=~\"($cluster)\"}", + "expr": "prometheus_notifications_latency_seconds{job=~\"($cluster)-prometheus.*\", quantile=\"0.9\", cluster_id=~\"($cluster)\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster_id}} - 0.9 quantile", @@ -729,7 +729,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "prometheus_notifications_latency_seconds{app=\"prometheus\",quantile=\"0.5\", cluster_id=~\"($cluster)\"}", + "expr": "prometheus_notifications_latency_seconds{job=~\"($cluster)-prometheus.*\", quantile=\"0.5\", cluster_id=~\"($cluster)\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster_id}} - 0.5 quantile", @@ -1389,7 +1389,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "max (\n (\n kubelet_volume_stats_capacity_bytes{cluster_type=~\"management_cluster\", app=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n - \n kubelet_volume_stats_available_bytes{cluster_type=~\"management_cluster\", app=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n )\n / kubelet_volume_stats_capacity_bytes{cluster_type=~\"management_cluster\", app=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n * 100\n) by (persistentvolumeclaim)", + "expr": "max (\n (\n kubelet_volume_stats_capacity_bytes{cluster_type=~\"management_cluster\", job=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n - \n kubelet_volume_stats_available_bytes{cluster_type=~\"management_cluster\", job=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n )\n / kubelet_volume_stats_capacity_bytes{cluster_type=~\"management_cluster\", job=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n * 100\n) by (persistentvolumeclaim)", "legendFormat": "{{persistentvolumeclaim}}", "range": true, "refId": "A" @@ -1416,7 +1416,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "description": "Number of samples scraped by app / job.\n\nShows which jobs scrape the most data.\n\nMetric: `scrape_samples_scraped`", + "description": "Number of samples scraped by job.\n\nShows which jobs scrape the most data.\n\nMetric: `scrape_samples_scraped`", "fieldConfig": { "defaults": { "color": { @@ -1475,9 +1475,9 @@ }, "editorMode": "code", "exemplar": false, - "expr": "topk(10, sum(scrape_samples_scraped{cluster_id=~\"$cluster\"}) by (app, job, cluster_id))", + "expr": "topk(10, sum(scrape_samples_scraped{cluster_id=~\"$cluster\"}) by (job, cluster_id))", "instant": true, - "legendFormat": "{{app}} | {{job}} | {{cluster_id}}", + "legendFormat": "{{job}} | {{cluster_id}}", "range": false, "refId": "A" } @@ -1625,7 +1625,7 @@ "exemplar": false, "expr": "topk(10, max_over_time(scrape_series_added{cluster_id=~\"$cluster\"}[$__rate_interval]))", "instant": true, - "legendFormat": "{{job}} | {{app}} | {{cluster_id}}", + "legendFormat": "{{job}} | {{cluster_id}}", "range": false, "refId": "A" } diff --git a/helm/dashboards/charts/public_dashboards/dashboards/shared/public/servicemonitors-details.json b/helm/dashboards/charts/public_dashboards/dashboards/shared/public/servicemonitors-details.json index 85d5c555..39f11f0f 100644 --- a/helm/dashboards/charts/public_dashboards/dashboards/shared/public/servicemonitors-details.json +++ b/helm/dashboards/charts/public_dashboards/dashboards/shared/public/servicemonitors-details.json @@ -273,7 +273,6 @@ "options": { "include": { "names": [ - "app", "code", "container", "endpoint", @@ -380,7 +379,6 @@ "options": { "include": { "names": [ - "app", "container", "endpoint", "installation", diff --git a/helm/dashboards/charts/public_dashboards/dashboards/shared/public/servicemonitors-overview.json b/helm/dashboards/charts/public_dashboards/dashboards/shared/public/servicemonitors-overview.json index 03c6c91f..a1704622 100644 --- a/helm/dashboards/charts/public_dashboards/dashboards/shared/public/servicemonitors-overview.json +++ b/helm/dashboards/charts/public_dashboards/dashboards/shared/public/servicemonitors-overview.json @@ -183,9 +183,9 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(scrape_samples_post_metric_relabeling{cluster_id=~\"$cluster\", job=~\"$job\"}) by (app, job, cluster_id)", + "expr": "sum(scrape_samples_post_metric_relabeling{cluster_id=~\"$cluster\", job=~\"$job\"}) by (job, cluster_id)", "instant": false, - "legendFormat": "Job: {{job}} / App: {{app}}) / Cluster: {{cluster_id}}", + "legendFormat": "Job: {{job}} / Cluster: {{cluster_id}}", "range": true, "refId": "A" } @@ -391,9 +391,9 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(max_over_time(scrape_series_added{cluster_id=~\"$cluster\", job=~\"$job\"}[$__interval])) by (cluster_id, job, app)", + "expr": "sum(max_over_time(scrape_series_added{cluster_id=~\"$cluster\", job=~\"$job\"}[$__interval])) by (cluster_id, job)", "instant": false, - "legendFormat": "Job: {{job}} / App: {{app}} / Cluster: {{cluster_id}}", + "legendFormat": "Job: {{job}} / Cluster: {{cluster_id}}", "range": true, "refId": "A" } @@ -495,9 +495,9 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(scrape_samples_scraped{cluster_id=~\"$cluster\", job=~\"$job\"} - scrape_samples_post_metric_relabeling{cluster_id=~\"$cluster\", job=~\"$job\"}) by (app, job, cluster_id)", + "expr": "sum(scrape_samples_scraped{cluster_id=~\"$cluster\", job=~\"$job\"} - scrape_samples_post_metric_relabeling{cluster_id=~\"$cluster\", job=~\"$job\"}) by (job, cluster_id)", "instant": false, - "legendFormat": "Job: {{job}} / App: {{app}}) / Cluster: {{cluster_id}}", + "legendFormat": "Job: {{job}} / Cluster: {{cluster_id}}", "range": true, "refId": "A" } @@ -510,7 +510,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "description": "Duration of the scrape. Is the sum of all targets for one job/app on a cluster.", + "description": "Duration of the scrape. Is the sum of all targets for one job on a cluster.", "fieldConfig": { "defaults": { "color": { @@ -599,9 +599,9 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(scrape_duration_seconds{cluster_id=~\"$cluster\", job=~\"$job\"}) by (cluster_id, job, app)", + "expr": "sum(scrape_duration_seconds{cluster_id=~\"$cluster\", job=~\"$job\"}) by (cluster_id, job)", "instant": false, - "legendFormat": "Job: {{job}} / App: {{app}} / Cluster: {{cluster_id}}", + "legendFormat": "Job: {{job}} / Cluster: {{cluster_id}}", "range": true, "refId": "A" }