Skip to content

Commit

Permalink
Add missing cluster labels and aggregations for apiserver alerts
Browse files Browse the repository at this point in the history
Also, remove unused variable kubeAPILatencyWarningSeconds, which was not deleted in #451

Signed-off-by: Roman Hros <[email protected]>
  • Loading branch information
chess-knight committed Feb 2, 2024
1 parent bf3acbd commit e80710c
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 10 deletions.
17 changes: 9 additions & 8 deletions alerts/kube_apiserver.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ local utils = import '../lib/utils.libsonnet';
_config+:: {
kubeApiserverSelector: error 'must provide selector for kube-apiserver',

kubeAPILatencyWarningSeconds: 1,

certExpirationWarningSeconds: 7 * 24 * 3600,
certExpirationCriticalSeconds: 1 * 24 * 3600,
},
Expand All @@ -18,13 +16,16 @@ local utils = import '../lib/utils.libsonnet';
{
alert: 'KubeAPIErrorBudgetBurn',
expr: |||
sum(apiserver_request:burnrate%s) > (%.2f * %.5f)
and
sum(apiserver_request:burnrate%s) > (%.2f * %.5f)
sum by(%s) (apiserver_request:burnrate%s) > (%.2f * %.5f)
and on(%s)
sum by(%s) (apiserver_request:burnrate%s) > (%.2f * %.5f)
||| % [
$._config.clusterLabel,
w.long,
w.factor,
(1 - $._config.SLOs.apiserver.target),
$._config.clusterLabel,
$._config.clusterLabel,
w.short,
w.factor,
(1 - $._config.SLOs.apiserver.target),
Expand All @@ -49,7 +50,7 @@ local utils = import '../lib/utils.libsonnet';
{
alert: 'KubeClientCertificateExpiration',
expr: |||
apiserver_client_certificate_expiration_seconds_count{%(kubeApiserverSelector)s} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{%(kubeApiserverSelector)s}[5m]))) < %(certExpirationWarningSeconds)s
apiserver_client_certificate_expiration_seconds_count{%(kubeApiserverSelector)s} > 0 and on(%(clusterLabel)s, job) histogram_quantile(0.01, sum by (%(clusterLabel)s, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{%(kubeApiserverSelector)s}[5m]))) < %(certExpirationWarningSeconds)s
||| % $._config,
'for': '5m',
labels: {
Expand All @@ -63,7 +64,7 @@ local utils = import '../lib/utils.libsonnet';
{
alert: 'KubeClientCertificateExpiration',
expr: |||
apiserver_client_certificate_expiration_seconds_count{%(kubeApiserverSelector)s} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{%(kubeApiserverSelector)s}[5m]))) < %(certExpirationCriticalSeconds)s
apiserver_client_certificate_expiration_seconds_count{%(kubeApiserverSelector)s} > 0 and on(%(clusterLabel)s, job) histogram_quantile(0.01, sum by (%(clusterLabel)s, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{%(kubeApiserverSelector)s}[5m]))) < %(certExpirationCriticalSeconds)s
||| % $._config,
'for': '5m',
labels: {
Expand Down Expand Up @@ -108,7 +109,7 @@ local utils = import '../lib/utils.libsonnet';
{
alert: 'KubeAPITerminatedRequests',
expr: |||
sum(rate(apiserver_request_terminations_total{%(kubeApiserverSelector)s}[10m])) / ( sum(rate(apiserver_request_total{%(kubeApiserverSelector)s}[10m])) + sum(rate(apiserver_request_terminations_total{%(kubeApiserverSelector)s}[10m])) ) > 0.20
sum by(%(clusterLabel)s) (rate(apiserver_request_terminations_total{%(kubeApiserverSelector)s}[10m])) / ( sum by(%(clusterLabel)s) (rate(apiserver_request_total{%(kubeApiserverSelector)s}[10m])) + sum by(%(clusterLabel)s) (rate(apiserver_request_terminations_total{%(kubeApiserverSelector)s}[10m])) ) > 0.20
||| % $._config,
labels: {
severity: 'warning',
Expand Down
5 changes: 3 additions & 2 deletions tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1125,9 +1125,9 @@ tests:

- interval: 1m
input_series:
- series: 'apiserver_request_terminations_total{job="kube-apiserver",apiserver="kube-apiserver"}'
- series: 'apiserver_request_terminations_total{cluster="kubernetes",job="kube-apiserver",apiserver="kube-apiserver"}'
values: '1+1x10'
- series: 'apiserver_request_total{job="kube-apiserver",apiserver="kube-apiserver"}'
- series: 'apiserver_request_total{cluster="kubernetes",job="kube-apiserver",apiserver="kube-apiserver"}'
values: '1+2x10'
alert_rule_test:
- eval_time: 5m # alert hasn't fired
Expand All @@ -1137,6 +1137,7 @@ tests:
exp_alerts:
- exp_labels:
severity: warning
cluster: "kubernetes"
exp_annotations:
summary: "The kubernetes apiserver has terminated 33.33% of its incoming requests."
description: "The kubernetes apiserver has terminated 33.33% of its incoming requests."
Expand Down

0 comments on commit e80710c

Please sign in to comment.