diff --git a/alerts/apps_alerts.libsonnet b/alerts/apps_alerts.libsonnet index 4359b1a77..754346abb 100644 --- a/alerts/apps_alerts.libsonnet +++ b/alerts/apps_alerts.libsonnet @@ -1,3 +1,5 @@ +local utils = import '../lib/utils.libsonnet'; + { _config+:: { kubeStateMetricsSelector: error 'must provide selector for kube-state-metrics', @@ -10,7 +12,8 @@ groups+: [ { name: 'kubernetes-apps', - rules: [ + rules: [utils.wrap_rule_for_labels(rule, $._config) for rule in self.rules_], + rules_:: [ { expr: ||| max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", %(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}[5m]) >= 1 diff --git a/config.libsonnet b/config.libsonnet index 4843e29bb..4b2615ed5 100644 --- a/config.libsonnet +++ b/config.libsonnet @@ -32,6 +32,16 @@ windowsExporterSelector: 'job="kubernetes-windows-exporter"', containerfsSelector: 'container!=""', + // List of labels to join for different type of metrics + // Only works if your environment has the labels kube_%s_labels (e.g. kube_pod_labels) available. + common_join_labels: [], + pods_join_labels: $._config.common_join_labels, + statefulsets_join_labels: $._config.common_join_labels, + deployments_join_labels: $._config.common_join_labels, + daemonsets_join_labels: $._config.common_join_labels, + horizontalpodautoscalers_join_labels: $._config.common_join_labels, + jobs_join_labels: $._config.common_join_labels, + // Grafana dashboard IDs are necessary for stable links for dashboards grafanaDashboardIDs: { 'apiserver.json': std.md5('apiserver.json'), diff --git a/lib/utils.libsonnet b/lib/utils.libsonnet index a8c4df41c..512d6e68f 100644 --- a/lib/utils.libsonnet +++ b/lib/utils.libsonnet @@ -15,4 +15,45 @@ if s > 60 * 60 * 24 then '%.1f days' % (s / 60 / 60 / 24) else '%.1f hours' % (s / 60 / 60), + + // Handle adding `group left` to join labels into rule by wrapping the rule in () * on(xxx) group_left(xxx) kube_xxx_labels + // If kind of rule is not defined try to detect rule type by alert name + wrap_rule_for_labels(rule, config): + // Detect Kind of rule from name unless hidden `kind field is passed in the rule` + local kind = + if 'kind' in rule then rule.kind + // Handle Alerts + else if std.objectHas(rule, 'alert') then + if std.startsWith(rule.alert, 'KubePod') then 'pod' + else if std.startsWith(rule.alert, 'KubeContainer') then 'pod' + else if std.startsWith(rule.alert, 'KubeStateful') then 'statefulset' + else if std.startsWith(rule.alert, 'KubeDeploy') then 'deployment' + else if std.startsWith(rule.alert, 'KubeDaemon') then 'daemonset' + else if std.startsWith(rule.alert, 'KubeHpa') then 'horizontalpodautoscaler' + else if std.startsWith(rule.alert, 'KubeJob') then 'job' + else 'none' + else 'none'; + + local labels = { + join_labels: config['%ss_join_labels' % kind], + // since the label 'job' is reserved, the resource with kind Job uses the label 'job_name' instead + on_labels: ['%s' % (if kind == 'job' then 'job_name' else kind), '%s' % config.namespaceLabel, '%s' % config.clusterLabel], + metric: 'kube_%s_labels' % kind, + }; + + // Failed to identify kind - return raw rule + if kind == 'none' then rule + // No join labels passed in the config - return raw rule + else if std.length(labels.join_labels) == 0 then rule + // Wrap expr with join group left + else + rule { + local expr = super.expr, + expr: '(%(expr)s) * on (%(on)s) group_left(%(join)s) %(metric)s' % { + expr: expr, + on: std.join(',', labels.on_labels), + join: std.join(',', labels.join_labels), + metric: labels.metric, + }, + }, }