From d6ab1a7cd86a7fe687284f81b385ad0a82cbc70e Mon Sep 17 00:00:00 2001 From: Sebastian Gaiser Date: Tue, 10 Dec 2024 19:24:32 +0100 Subject: [PATCH] fix(alerts): add instance label to KubeAggregatedAPIErrors (#991) * fix(alerts): use sum by instance max for 'KubeAggregatedAPIErrors' Signed-off-by: Sebastian Gaiser * Update alerts/kube_apiserver.libsonnet Co-authored-by: Stephen Lang * Update alerts/kube_apiserver.libsonnet * Update alerts/kube_apiserver.libsonnet * fix(alerts): use sum by instance max for 'KubeAggregatedAPIErrors' - adjust description Signed-off-by: Sebastian Gaiser * fix(build): remove extra parenthsis Signed-off-by: Stephen Lang --------- Signed-off-by: Sebastian Gaiser Signed-off-by: Stephen Lang Co-authored-by: Stephen Lang Co-authored-by: Stephen Lang --- alerts/kube_apiserver.libsonnet | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/alerts/kube_apiserver.libsonnet b/alerts/kube_apiserver.libsonnet index 558c2acd2..d67bbf3ed 100644 --- a/alerts/kube_apiserver.libsonnet +++ b/alerts/kube_apiserver.libsonnet @@ -88,13 +88,16 @@ local utils = import '../lib/utils.libsonnet'; { alert: 'KubeAggregatedAPIErrors', expr: ||| - sum by(name, namespace, %(clusterLabel)s)(increase(aggregator_unavailable_apiservice_total{%(kubeApiserverSelector)s}[10m])) > 4 + sum by(%(clusterLabel)s, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{%(kubeApiserverSelector)s}[1m])) > 0 ||| % $._config, + 'for': '10m', labels: { severity: 'warning', }, annotations: { - description: 'Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m.', + description: 'Kubernetes aggregated API {{ $labels.instance }}/{{ $labels.name }} has reported {{ $labels.reason }} errors%s.' % [ + utils.ifShowMultiCluster($._config, ' on cluster {{ $labels.%(clusterLabel)s }}' % $._config), + ], summary: 'Kubernetes aggregated API has reported errors.', }, },