From de8329f2b672920421fa49f5be21050aff58e3a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan-Otto=20Kr=C3=B6pke?= Date: Thu, 23 Nov 2023 19:45:25 +0100 Subject: [PATCH 1/6] Storage Alerts: Missing cluster label MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jan-Otto Kröpke --- alerts/storage_alerts.libsonnet | 26 +++++++++++++------------- rules/node.libsonnet | 4 ++-- tests.yaml | 18 +++++++++--------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/alerts/storage_alerts.libsonnet b/alerts/storage_alerts.libsonnet index ce028683f..ebeed2b85 100644 --- a/alerts/storage_alerts.libsonnet +++ b/alerts/storage_alerts.libsonnet @@ -26,9 +26,9 @@ ) < 0.03 and kubelet_volume_stats_used_bytes{%(prefixedNamespaceSelector)s%(kubeletSelector)s} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(%(clusterLabel)s, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{%(prefixedNamespaceSelector)s access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(%(clusterLabel)s, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{%(prefixedNamespaceSelector)s%(pvExcludedSelector)s} == 1 ||| % $._config, 'for': '1m', @@ -36,7 +36,7 @@ severity: 'critical', }, annotations: { - description: 'The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free.', + description: 'The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.%(clusterLabel)s }} is only {{ $value | humanizePercentage }} free.' % $._config, summary: 'PersistentVolume is filling up.', }, }, @@ -52,9 +52,9 @@ kubelet_volume_stats_used_bytes{%(prefixedNamespaceSelector)s%(kubeletSelector)s} > 0 and predict_linear(kubelet_volume_stats_available_bytes{%(prefixedNamespaceSelector)s%(kubeletSelector)s}[%(volumeFullPredictionSampleTime)s], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(%(clusterLabel)s, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{%(prefixedNamespaceSelector)s access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(%(clusterLabel)s, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{%(prefixedNamespaceSelector)s%(pvExcludedSelector)s} == 1 ||| % $._config, 'for': '1h', @@ -62,7 +62,7 @@ severity: 'warning', }, annotations: { - description: 'Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available.', + description: 'Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.%(clusterLabel)s }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available.' % $._config, summary: 'PersistentVolume is filling up.', }, }, @@ -76,9 +76,9 @@ ) < 0.03 and kubelet_volume_stats_inodes_used{%(prefixedNamespaceSelector)s%(kubeletSelector)s} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(%(clusterLabel)s, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{%(prefixedNamespaceSelector)s access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(%(clusterLabel)s, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{%(prefixedNamespaceSelector)s%(pvExcludedSelector)s} == 1 ||| % $._config, 'for': '1m', @@ -86,7 +86,7 @@ severity: 'critical', }, annotations: { - description: 'The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} only has {{ $value | humanizePercentage }} free inodes.', + description: 'The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.%(clusterLabel)s }} only has {{ $value | humanizePercentage }} free inodes.' % $._config, summary: 'PersistentVolumeInodes are filling up.', }, }, @@ -102,9 +102,9 @@ kubelet_volume_stats_inodes_used{%(prefixedNamespaceSelector)s%(kubeletSelector)s} > 0 and predict_linear(kubelet_volume_stats_inodes_free{%(prefixedNamespaceSelector)s%(kubeletSelector)s}[%(volumeFullPredictionSampleTime)s], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(%(clusterLabel)s, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{%(prefixedNamespaceSelector)s access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(%(clusterLabel)s, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{%(prefixedNamespaceSelector)s%(pvExcludedSelector)s} == 1 ||| % $._config, 'for': '1h', @@ -112,7 +112,7 @@ severity: 'warning', }, annotations: { - description: 'Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to run out of inodes within four days. Currently {{ $value | humanizePercentage }} of its inodes are free.', + description: 'Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.%(clusterLabel)s }} is expected to run out of inodes within four days. Currently {{ $value | humanizePercentage }} of its inodes are free.' % $._config, summary: 'PersistentVolumeInodes are filling up.', }, }, @@ -126,7 +126,7 @@ severity: 'critical', }, annotations: { - description: 'The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}.', + description: 'The persistent volume {{ $labels.persistentvolume }} on Cluster {{ $labels.%(clusterLabel)s }} has status {{ $labels.phase }}.' % $._config, summary: 'PersistentVolume is having issues with provisioning.', }, }, diff --git a/rules/node.libsonnet b/rules/node.libsonnet index e4e949bd7..453e3720b 100644 --- a/rules/node.libsonnet +++ b/rules/node.libsonnet @@ -32,8 +32,8 @@ expr: ||| count by (%(clusterLabel)s, node) ( node_cpu_seconds_total{mode="idle",%(nodeExporterSelector)s} - * on (namespace, %(podLabel)s) group_left(node) - topk by(namespace, %(podLabel)s) (1, node_namespace_pod:kube_pod_info:) + * on (%(clusterLabel)s, namespace, %(podLabel)s) group_left(node) + topk by(%(clusterLabel)s, namespace, %(podLabel)s) (1, node_namespace_pod:kube_pod_info:) ) ||| % $._config, }, diff --git a/tests.yaml b/tests.yaml index a2fb0bb0f..e97a17d7e 100644 --- a/tests.yaml +++ b/tests.yaml @@ -33,7 +33,7 @@ tests: severity: critical exp_annotations: summary: "PersistentVolume is filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring is only 1.562% free.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster is only 1.562% free.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup # Don't alert when PVC access_mode is ReadOnlyMany @@ -119,7 +119,7 @@ tests: severity: critical exp_annotations: summary: "PersistentVolume is filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring is only 1.294% free.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster is only 1.294% free.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup - interval: 1m @@ -143,7 +143,7 @@ tests: severity: warning exp_annotations: summary: "PersistentVolume is filling up." - description: 'Based on recent sampling, the PersistentVolume claimed by somepvc in Namespace monitoring is expected to fill up within four days. Currently 1.263% is available.' + description: 'Based on recent sampling, the PersistentVolume claimed by somepvc in Namespace monitoring on Cluster is expected to fill up within four days. Currently 1.263% is available.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup - exp_labels: job: kubelet @@ -152,7 +152,7 @@ tests: severity: critical exp_annotations: summary: "PersistentVolume is filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring is only 1.263% free.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster is only 1.263% free.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup # Block volume mounts can report 0 for the kubelet_volume_stats_used_bytes metric but it shouldn't trigger the KubePersistentVolumeFillingUp alert. @@ -228,7 +228,7 @@ tests: severity: critical exp_annotations: summary: "PersistentVolumeInodes are filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring only has 1.562% free inodes.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster only has 1.562% free inodes.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeinodesfillingup # Don't alert when PVC access_mode is ReadOnlyMany @@ -314,7 +314,7 @@ tests: severity: critical exp_annotations: summary: "PersistentVolumeInodes are filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring only has 1.294% free inodes.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster only has 1.294% free inodes.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeinodesfillingup - interval: 1m @@ -338,7 +338,7 @@ tests: severity: warning exp_annotations: summary: "PersistentVolumeInodes are filling up." - description: 'Based on recent sampling, the PersistentVolume claimed by somepvc in Namespace monitoring is expected to run out of inodes within four days. Currently 1.263% of its inodes are free.' + description: 'Based on recent sampling, the PersistentVolume claimed by somepvc in Namespace monitoring on Cluster is expected to run out of inodes within four days. Currently 1.263% of its inodes are free.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeinodesfillingup - exp_labels: job: kubelet @@ -347,7 +347,7 @@ tests: severity: critical exp_annotations: summary: "PersistentVolumeInodes are filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring only has 1.263% free inodes.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster only has 1.263% free inodes.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeinodesfillingup # Block volume mounts can report 0 for the kubelet_volume_stats_inodes_used metric but it shouldn't trigger the KubePersistentVolumeInodesFillingUp alert. @@ -1137,7 +1137,7 @@ tests: runbook_url: "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping" summary: "Pod is crash looping." - eval_time: 20m - alertname: KubePodCrashLooping # alert fired for a period of 5 minutes after resolution because the alert looks back at the last 5 minutes of data and the range vector doesn't take stale samples into account + alertname: KubePodCrashLooping # alert fired for a period of 5 minutes after resolution because the alert looks back at the last 5 minutes of data and the range vector doesn't take stale samples into account exp_alerts: - exp_labels: severity: "warning" From b4594749630b1ec6c25f420ee4e11b9552bd6020 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan-Otto=20Kr=C3=B6pke?= Date: Fri, 24 Nov 2023 14:57:06 +0100 Subject: [PATCH 2/6] Add cluster label to all tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jan-Otto Kröpke --- tests.yaml | 305 ++++++++++++++++++++++++++++------------------------- 1 file changed, 163 insertions(+), 142 deletions(-) diff --git a/tests.yaml b/tests.yaml index e97a17d7e..cdba57eb3 100644 --- a/tests.yaml +++ b/tests.yaml @@ -8,13 +8,13 @@ tests: # PersistentVolume disk space - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '16 64 512 1024' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1 1 1 1' alert_rule_test: - eval_time: 1m @@ -29,23 +29,24 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes persistentvolumeclaim: somepvc severity: critical exp_annotations: summary: "PersistentVolume is filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster is only 1.562% free.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes is only 1.562% free.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup # Don't alert when PVC access_mode is ReadOnlyMany - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '16 64 512 1024' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' values: '1 1 1 1' alert_rule_test: - eval_time: 1m @@ -61,11 +62,11 @@ tests: # See https://github.com/kubernetes/kubernetes/commit/b997e0e4d6ccbead435a47d6ac75b0db3d17252f for details. - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '0 0 0 0' alert_rule_test: - eval_time: 1m @@ -80,15 +81,15 @@ tests: # Don't alert when PVC has been labelled as fully utilised - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '16 64 512 1024' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1 1 1 1' - - series: 'kube_persistentvolumeclaim_labels{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' + - series: 'kube_persistentvolumeclaim_labels{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' values: '1 1 1 1' alert_rule_test: - eval_time: 1m @@ -102,11 +103,11 @@ tests: - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024+10x61' alert_rule_test: - eval_time: 1h @@ -115,22 +116,23 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes persistentvolumeclaim: somepvc severity: critical exp_annotations: summary: "PersistentVolume is filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster is only 1.294% free.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes is only 1.294% free.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024+10x61' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1x61' alert_rule_test: - eval_time: 61m @@ -139,31 +141,33 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes persistentvolumeclaim: somepvc severity: warning exp_annotations: summary: "PersistentVolume is filling up." - description: 'Based on recent sampling, the PersistentVolume claimed by somepvc in Namespace monitoring on Cluster is expected to fill up within four days. Currently 1.263% is available.' + description: 'Based on recent sampling, the PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes is expected to fill up within four days. Currently 1.263% is available.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes persistentvolumeclaim: somepvc severity: critical exp_annotations: summary: "PersistentVolume is filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster is only 1.263% free.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes is only 1.263% free.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup # Block volume mounts can report 0 for the kubelet_volume_stats_used_bytes metric but it shouldn't trigger the KubePersistentVolumeFillingUp alert. # See https://github.com/kubernetes/kubernetes/commit/b997e0e4d6ccbead435a47d6ac75b0db3d17252f for details. - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '0x61' alert_rule_test: - eval_time: 61m @@ -172,13 +176,13 @@ tests: # Don't alert when PVC access_mode is ReadOnlyMany - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1x61' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' values: '1x61' alert_rule_test: - eval_time: 61m @@ -186,15 +190,15 @@ tests: - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024+10x61' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1x61' - - series: 'kube_persistentvolumeclaim_labels{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' + - series: 'kube_persistentvolumeclaim_labels{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' values: '1x61' alert_rule_test: - eval_time: 61m @@ -203,13 +207,13 @@ tests: # PersistentVolume inodes - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '16 64 512 1024' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1 1 1 1' alert_rule_test: - eval_time: 1m @@ -224,23 +228,24 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes persistentvolumeclaim: somepvc severity: critical exp_annotations: summary: "PersistentVolumeInodes are filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster only has 1.562% free inodes.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes only has 1.562% free inodes.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeinodesfillingup # Don't alert when PVC access_mode is ReadOnlyMany - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '16 64 512 1024' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' values: '1 1 1 1' alert_rule_test: - eval_time: 1m @@ -256,11 +261,11 @@ tests: # See https://github.com/kubernetes/kubernetes/commit/b997e0e4d6ccbead435a47d6ac75b0db3d17252f for details. - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '0 0 0 0' alert_rule_test: - eval_time: 1m @@ -275,15 +280,15 @@ tests: # Don't alert when PVC has been labelled as fully utilised - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '16 64 512 1024' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1 1 1 1' - - series: 'kube_persistentvolumeclaim_labels{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' + - series: 'kube_persistentvolumeclaim_labels{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' values: '1 1 1 1' alert_rule_test: - eval_time: 1m @@ -297,11 +302,11 @@ tests: - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024+10x61' alert_rule_test: - eval_time: 1h @@ -310,22 +315,23 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes persistentvolumeclaim: somepvc severity: critical exp_annotations: summary: "PersistentVolumeInodes are filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster only has 1.294% free inodes.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes only has 1.294% free inodes.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeinodesfillingup - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024+10x61' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1x61' alert_rule_test: - eval_time: 61m @@ -334,31 +340,33 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes persistentvolumeclaim: somepvc severity: warning exp_annotations: summary: "PersistentVolumeInodes are filling up." - description: 'Based on recent sampling, the PersistentVolume claimed by somepvc in Namespace monitoring on Cluster is expected to run out of inodes within four days. Currently 1.263% of its inodes are free.' + description: 'Based on recent sampling, the PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes is expected to run out of inodes within four days. Currently 1.263% of its inodes are free.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeinodesfillingup - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes persistentvolumeclaim: somepvc severity: critical exp_annotations: summary: "PersistentVolumeInodes are filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster only has 1.263% free inodes.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes only has 1.263% free inodes.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeinodesfillingup # Block volume mounts can report 0 for the kubelet_volume_stats_inodes_used metric but it shouldn't trigger the KubePersistentVolumeInodesFillingUp alert. # See https://github.com/kubernetes/kubernetes/commit/b997e0e4d6ccbead435a47d6ac75b0db3d17252f for details. - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '0x61' alert_rule_test: - eval_time: 61m @@ -367,13 +375,13 @@ tests: # Don't alert when PVC access_mode is ReadOnlyMany - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1x61' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' values: '1x61' alert_rule_test: - eval_time: 61m @@ -381,15 +389,15 @@ tests: - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024+10x61' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1x61' - - series: 'kube_persistentvolumeclaim_labels{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' + - series: 'kube_persistentvolumeclaim_labels{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' values: '1x61' alert_rule_test: - eval_time: 61m @@ -560,7 +568,7 @@ tests: - interval: 1m input_series: - - series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}' + - series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",instance="10.0.2.15:10250",job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}' values: '1 0 1 0 1 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1' alert_rule_test: - eval_time: 18m @@ -681,120 +689,120 @@ tests: - interval: 1m input_series: - - series: 'container_cpu_usage_seconds_total{container="alertmanager",cpu="total",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' + - series: 'container_cpu_usage_seconds_total{container="alertmanager",cpu="total",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' values: '0+3x5' - - series: 'container_cpu_usage_seconds_total{container="alertmanager",cpu="total",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' + - series: 'container_cpu_usage_seconds_total{container="alertmanager",cpu="total",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' values: '0+3x5' # Duplicate timeseries from different instances. - - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' - - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' values: '1+0x5' # Missing node label. - - series: 'kube_pod_info{namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' promql_expr_test: - eval_time: 5m expr: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate exp_samples: - value: 5.0e-2 - labels: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace="monitoring", pod="alertmanager-main-0", container="alertmanager", node="node1"}' + labels: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster="kubernetes",namespace="monitoring", pod="alertmanager-main-0", container="alertmanager", node="node1"}' - interval: 1m input_series: - - series: 'container_memory_working_set_bytes{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' + - series: 'container_memory_working_set_bytes{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' values: '1000+0x5' - - series: 'container_memory_working_set_bytes{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' + - series: 'container_memory_working_set_bytes{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' values: '1000+0x5' # Duplicate timeseries from different instances. - - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' - - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' values: '1+0x5' # Missing node label. - - series: 'kube_pod_info{namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' promql_expr_test: - eval_time: 5m expr: node_namespace_pod_container:container_memory_working_set_bytes exp_samples: - value: 1.0e+3 - labels: 'node_namespace_pod_container:container_memory_working_set_bytes{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' + labels: 'node_namespace_pod_container:container_memory_working_set_bytes{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' - interval: 1m input_series: - - series: 'container_memory_rss{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' + - series: 'container_memory_rss{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' values: '1000+0x5' - - series: 'container_memory_rss{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' + - series: 'container_memory_rss{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' values: '1000+0x5' # Duplicate timeseries from different instances. - - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' - - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' values: '1+0x5' # Missing node label. - - series: 'kube_pod_info{namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' promql_expr_test: - eval_time: 5m expr: node_namespace_pod_container:container_memory_rss exp_samples: - value: 1.0e+3 - labels: 'node_namespace_pod_container:container_memory_rss{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' + labels: 'node_namespace_pod_container:container_memory_rss{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' - interval: 1m input_series: - - series: 'container_memory_cache{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' + - series: 'container_memory_cache{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' values: '1000+0x5' - - series: 'container_memory_cache{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' + - series: 'container_memory_cache{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' values: '1000+0x5' # Duplicate timeseries from different instances. - - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' - - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' values: '1+0x5' # Missing node label. - - series: 'kube_pod_info{namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' promql_expr_test: - eval_time: 5m expr: node_namespace_pod_container:container_memory_cache exp_samples: - value: 1.0e+3 - labels: 'node_namespace_pod_container:container_memory_cache{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' + labels: 'node_namespace_pod_container:container_memory_cache{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' - interval: 1m input_series: - - series: 'container_memory_swap{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' + - series: 'container_memory_swap{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' values: '1000+0x5' - - series: 'container_memory_swap{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' + - series: 'container_memory_swap{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' values: '1000+0x5' # Duplicate timeseries from different instances. - - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' - - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' values: '1+0x5' # Missing node label. - - series: 'kube_pod_info{namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' promql_expr_test: - eval_time: 5m expr: node_namespace_pod_container:container_memory_swap exp_samples: - value: 1.0e+3 - labels: 'node_namespace_pod_container:container_memory_swap{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' + labels: 'node_namespace_pod_container:container_memory_swap{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' - interval: 1m # Current unequal desired and not progressing. input_series: - - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 3 4 4 4 3 4 4 4 3 4 4 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4' - - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' - - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4' - - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 3 3 3 4 3 3 3 4 3 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4' alert_rule_test: - eval_time: 32m @@ -805,6 +813,7 @@ tests: - exp_labels: job: kube-state-metrics namespace: monitoring + cluster: kubernetes daemonset: node-exporter severity: warning exp_annotations: @@ -816,7 +825,7 @@ tests: # KubeDeploymentRolloutStuck - interval: 1m input_series: - - series: 'kube_deployment_status_condition{job="kube-state-metrics",namespace="monitoring",deployment="stuck", condition="Progressing", status="false"}' + - series: 'kube_deployment_status_condition{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",deployment="stuck", condition="Progressing", status="false"}' values: '1+0x17 0+0x5' alert_rule_test: - eval_time: 14m @@ -827,6 +836,7 @@ tests: - exp_labels: job: kube-state-metrics namespace: monitoring + cluster: kubernetes deployment: stuck severity: warning condition: Progressing @@ -840,15 +850,15 @@ tests: - interval: 1m # Misscheduled is non zero. input_series: - - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 3 4 4 4 3 4 4 4 3 4 4 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4' - - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0' - - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4' - - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 3 3 3 4 3 3 3 4 3 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4' alert_rule_test: - eval_time: 32m @@ -859,6 +869,7 @@ tests: - exp_labels: job: kube-state-metrics namespace: monitoring + cluster: kubernetes daemonset: node-exporter severity: warning exp_annotations: @@ -870,15 +881,15 @@ tests: - interval: 1m # Updated number unequal desired. input_series: - - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 3 4 4 4 3 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' - - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4' - - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 3 3 3 4 3 3 3 4 3 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4' alert_rule_test: - eval_time: 32m @@ -889,6 +900,7 @@ tests: - exp_labels: job: kube-state-metrics namespace: monitoring + cluster: kubernetes daemonset: node-exporter severity: warning exp_annotations: @@ -900,15 +912,15 @@ tests: - interval: 1m # Number available unequal desired. input_series: - - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 3 4 4 4 3 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' - - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 0 0 0 1 1 1 1 2 2 2 2 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 3 3 3 4 3 3 3 4 3 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4' alert_rule_test: - eval_time: 34m @@ -919,6 +931,7 @@ tests: - exp_labels: job: kube-state-metrics namespace: monitoring + cluster: kubernetes daemonset: node-exporter severity: warning exp_annotations: @@ -930,7 +943,7 @@ tests: - interval: 1m input_series: - - series: 'kubelet_certificate_manager_client_ttl_seconds{job="kubelet",namespace="monitoring",node="minikube"}' + - series: 'kubelet_certificate_manager_client_ttl_seconds{job="kubelet",cluster="kubernetes",namespace="monitoring",node="minikube"}' values: '86400-60x1' alert_rule_test: - eval_time: 0m @@ -939,6 +952,7 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -951,6 +965,7 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -960,6 +975,7 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes node: minikube severity: critical exp_annotations: @@ -969,7 +985,7 @@ tests: - interval: 1m input_series: - - series: 'kubelet_certificate_manager_server_ttl_seconds{job="kubelet",namespace="monitoring",node="minikube"}' + - series: 'kubelet_certificate_manager_server_ttl_seconds{job="kubelet",cluster="kubernetes",namespace="monitoring",node="minikube"}' values: '86400-60x1' alert_rule_test: - eval_time: 0m @@ -978,6 +994,7 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -990,6 +1007,7 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -999,6 +1017,7 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes node: minikube severity: critical exp_annotations: @@ -1008,7 +1027,7 @@ tests: - interval: 1m input_series: - - series: 'kubelet_certificate_manager_client_expiration_renew_errors{job="kubelet",namespace="monitoring",node="minikube"}' + - series: 'kubelet_certificate_manager_client_expiration_renew_errors{job="kubelet",cluster="kubernetes",namespace="monitoring",node="minikube"}' values: '0+1x20' alert_rule_test: - eval_time: 16m @@ -1017,6 +1036,7 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -1027,7 +1047,7 @@ tests: - interval: 1m input_series: - - series: 'kubelet_server_expiration_renew_errors{job="kubelet",namespace="monitoring",node="minikube"}' + - series: 'kubelet_server_expiration_renew_errors{job="kubelet",cluster="kubernetes",namespace="monitoring",node="minikube"}' values: '0+1x20' alert_rule_test: - eval_time: 16m @@ -1036,6 +1056,7 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -1162,9 +1183,9 @@ tests: values: '100x10' - series: 'kube_resourcequota{namespace="test1", resource="requests.cpu", type="hard", job="kube-state-metrics"}' values: '50x10' - - series: 'kube_node_status_allocatable{namespace="monitoring", node="n1", resource="cpu", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring", node="n1", resource="cpu", job="kube-state-metrics"}' values: '100x10' - - series: 'kube_node_status_allocatable{namespace="monitoring", node="n2", resource="cpu", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring", node="n2", resource="cpu", job="kube-state-metrics"}' values: '100x10' alert_rule_test: - eval_time: 4m @@ -1179,9 +1200,9 @@ tests: values: '200x10' - series: 'kube_resourcequota{namespace="test1", resource="requests.cpu", type="hard", job="kube-state-metrics"}' values: '200x10' - - series: 'kube_node_status_allocatable{namespace="monitoring", node="n1", resource="cpu", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring", node="n1", resource="cpu", job="kube-state-metrics"}' values: '100x10' - - series: 'kube_node_status_allocatable{namespace="monitoring", node="n2", resource="cpu", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring", node="n2", resource="cpu", job="kube-state-metrics"}' values: '100x10' alert_rule_test: - eval_time: 4m @@ -1205,9 +1226,9 @@ tests: values: '100x10' - series: 'kube_resourcequota{namespace="test1", resource="requests.memory", type="hard", job="kube-state-metrics"}' values: '50x10' - - series: 'kube_node_status_allocatable{namespace="monitoring", node="n1", resource="memory", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring", node="n1", resource="memory", job="kube-state-metrics"}' values: '100x10' - - series: 'kube_node_status_allocatable{namespace="monitoring", node="n2", resource="memory", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring", node="n2", resource="memory", job="kube-state-metrics"}' values: '100x10' alert_rule_test: - eval_time: 4m @@ -1222,9 +1243,9 @@ tests: values: '500x10' - series: 'kube_resourcequota{namespace="test1", resource="requests.memory", type="hard", job="kube-state-metrics"}' values: '500x10' - - series: 'kube_node_status_allocatable{namespace="monitoring", node="n1", resource="memory", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring", node="n1", resource="memory", job="kube-state-metrics"}' values: '10x10' - - series: 'kube_node_status_allocatable{namespace="monitoring", node="n2", resource="memory", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring", node="n2", resource="memory", job="kube-state-metrics"}' values: '10x10' alert_rule_test: - eval_time: 4m From 6855a076faf8855ad3b780eb551b5e06166da51f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan-Otto=20Kr=C3=B6pke?= Date: Fri, 24 Nov 2023 15:05:08 +0100 Subject: [PATCH 3/6] Add cluster label to all all tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jan-Otto Kröpke --- tests.yaml | 179 +++++++++++++++++++++++++++-------------------------- 1 file changed, 90 insertions(+), 89 deletions(-) diff --git a/tests.yaml b/tests.yaml index cdba57eb3..f874af689 100644 --- a/tests.yaml +++ b/tests.yaml @@ -405,19 +405,19 @@ tests: - interval: 1m input_series: - - series: 'kube_node_status_capacity{resource="pods",instance="172.17.0.5:8443",node="minikube",job="kube-state-metrics", namespace="kube-system"}' + - series: 'kube_node_status_capacity{resource="pods",instance="172.17.0.5:8443",cluster="kubernetes",node="minikube",job="kube-state-metrics",namespace="kube-system"}' values: '3+0x15' - - series: 'kube_pod_info{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",namespace="kube-system",node="minikube",pod="pod-1",service="kube-state-metrics"}' + - series: 'kube_pod_info{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",cluster="kubernetes",namespace="kube-system",node="minikube",pod="pod-1",service="kube-state-metrics"}' values: '1+0x15' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",namespace="kube-system",phase="Running",pod="pod-1",service="kube-state-metrics"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",cluster="kubernetes",namespace="kube-system",phase="Running",pod="pod-1",service="kube-state-metrics"}' values: '1+0x15' - - series: 'kube_pod_info{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",namespace="kube-system",node="minikube",pod="pod-2",service="kube-state-metrics"}' + - series: 'kube_pod_info{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",cluster="kubernetes",namespace="kube-system",node="minikube",pod="pod-2",service="kube-state-metrics"}' values: '1+0x15' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",namespace="kube-system",phase="Running",pod="pod-2",service="kube-state-metrics"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",cluster="kubernetes",namespace="kube-system",phase="Running",pod="pod-2",service="kube-state-metrics"}' values: '1+0x15' - - series: 'kube_pod_info{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",namespace="kube-system",node="minikube",pod="pod-3",service="kube-state-metrics"}' + - series: 'kube_pod_info{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",cluster="kubernetes",namespace="kube-system",node="minikube",pod="pod-3",service="kube-state-metrics"}' values: '1+0x15' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",namespace="kube-system",phase="Running",pod="pod-3",service="kube-state-metrics"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",cluster="kubernetes",namespace="kube-system",phase="Running",pod="pod-3",service="kube-state-metrics"}' values: '1+0x15' alert_rule_test: - eval_time: 10m @@ -435,93 +435,93 @@ tests: - interval: 1m input_series: - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.15+0x10' - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' + - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' values: '0.15+0x10' - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.1+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '1E9+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' + - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' values: '1E9+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.5E9+0x10' # Duplicate kube_pod_status_phase timeseries for the same pod. - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm"}' values: '1 stale' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",phase="Pending",pod="pod-1",service="ksm"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Pending",pod="pod-1",service="ksm"}' values: '1+0x10' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",phase="Completed",pod="pod-2",service="ksm"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Completed",pod="pod-2",service="ksm"}' values: '1+0x10' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-2",job="kube-state-metrics",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm"}' values: '1+0x10' promql_expr_test: - eval_time: 0m expr: namespace_cpu:kube_pod_container_resource_requests:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{namespace="kube-apiserver"}' + labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 0m expr: namespace_memory:kube_pod_container_resource_requests:sum exp_samples: - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{namespace="kube-apiserver"}' + labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 1m expr: namespace_cpu:kube_pod_container_resource_requests:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{namespace="kube-apiserver"}' + labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 1m expr: namespace_memory:kube_pod_container_resource_requests:sum exp_samples: - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{namespace="kube-apiserver"}' + labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - interval: 1m input_series: - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm",cluster="test"}' values: '0.15+0x10' - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm",cluster="test"}' + - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm",cluster="test"}' values: '0.15+0x10' - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm",cluster="test"}' values: '0.1+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm",cluster="test"}' values: '1E9+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm",cluster="test"}' + - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm",cluster="test"}' values: '1E9+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm",cluster="test"}' values: '0.5E9+0x10' # Duplicate kube_pod_status_phase timeseries for the same pod. - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm",cluster="test"}' values: '1 stale' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",phase="Pending",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Pending",pod="pod-1",service="ksm",cluster="test"}' values: '1+0x10' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",phase="Completed",pod="pod-2",service="ksm",cluster="test"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Completed",pod="pod-2",service="ksm",cluster="test"}' values: '1+0x10' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-2",job="kube-state-metrics",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm",cluster="test"}' values: '1+0x10' promql_expr_test: - eval_time: 0m expr: namespace_cpu:kube_pod_container_resource_requests:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{namespace="kube-apiserver",cluster="test"}' + labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver",cluster="test"}' - eval_time: 0m expr: namespace_memory:kube_pod_container_resource_requests:sum exp_samples: - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{namespace="kube-apiserver",cluster="test"}' + labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver",cluster="test"}' - eval_time: 1m expr: namespace_cpu:kube_pod_container_resource_requests:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{namespace="kube-apiserver",cluster="test"}' + labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver",cluster="test"}' - eval_time: 1m expr: namespace_memory:kube_pod_container_resource_requests:sum exp_samples: - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{namespace="kube-apiserver",cluster="test"}' + labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver",cluster="test"}' - interval: 1m input_series: @@ -550,7 +550,7 @@ tests: values: '1+0x10' - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="+Inf", instance="10.0.2.15:10250"}' values: '30+1x10' - - series: 'kubelet_node_name{endpoint="https-metrics",instance="10.0.2.15:10250",job="kubelet",namespace="kube-system",node="minikube",service="kubelet"}' + - series: 'kubelet_node_name{endpoint="https-metrics",instance="10.0.2.15:10250",job="kubelet",cluster="kubernetes",namespace="kube-system",node="minikube",service="kubelet"}' values: '1 1 1 1 1 1 1 1 1 1' alert_rule_test: - eval_time: 10m @@ -575,6 +575,7 @@ tests: alertname: KubeNodeReadinessFlapping exp_alerts: - exp_labels: + cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -585,27 +586,27 @@ tests: # Verify that node:node_num_cpu:sum triggers no many-to-many errors. - interval: 1m input_series: - - series: 'node_cpu_seconds_total{cpu="0",endpoint="https",instance="instance1",job="node-exporter",mode="idle",namespace="openshift-monitoring",pod="node-exporter-1",service="node-exporter"}' + - series: 'node_cpu_seconds_total{cpu="0",endpoint="https",instance="instance1",job="node-exporter",mode="idle",cluster="kubernetes",namespace="openshift-monitoring",pod="node-exporter-1",service="node-exporter"}' values: '1 1' - - series: 'node_cpu_seconds_total{cpu="1",endpoint="https",instance="instance1",job="node-exporter",mode="idle",namespace="openshift-monitoring",pod="node-exporter-1",service="node-exporter"}' + - series: 'node_cpu_seconds_total{cpu="1",endpoint="https",instance="instance1",job="node-exporter",mode="idle",cluster="kubernetes",namespace="openshift-monitoring",pod="node-exporter-1",service="node-exporter"}' values: '1 1' - - series: 'kube_pod_info{namespace="openshift-monitoring",node="node-1",pod="node-exporter-1",job="kube-state-metrics",instance="10.129.2.7:8443"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="openshift-monitoring",node="node-1",pod="node-exporter-1",job="kube-state-metrics",instance="10.129.2.7:8443"}' values: '1 1' - - series: 'kube_pod_info{namespace="openshift-monitoring",node="node-1",pod="alertmanager-0",job="kube-state-metrics",instance="10.129.2.7:8443"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="openshift-monitoring",node="node-1",pod="alertmanager-0",job="kube-state-metrics",instance="10.129.2.7:8443"}' values: '1 stale' - - series: 'kube_pod_info{namespace="openshift-monitoring",node="node-2",pod="alertmanager-0",job="kube-state-metrics",instance="10.129.2.7:8443"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="openshift-monitoring",node="node-2",pod="alertmanager-0",job="kube-state-metrics",instance="10.129.2.7:8443"}' values: '1 1' promql_expr_test: - eval_time: 0m expr: node:node_num_cpu:sum exp_samples: - value: 2 - labels: 'node:node_num_cpu:sum{node="node-1"}' + labels: 'node:node_num_cpu:sum{cluster="kubernetes",node="node-1"}' - eval_time: 1m expr: node:node_num_cpu:sum exp_samples: - value: 2 - labels: 'node:node_num_cpu:sum{node="node-1"}' + labels: 'node:node_num_cpu:sum{cluster="kubernetes",node="node-1"}' # Verify that node:node_num_cpu:sum doesn't trigger many-to-many errors when # node_namespace_pod:kube_pod_info: has duplicate entries for the same @@ -613,66 +614,66 @@ tests: # it didn't add stale markers to the "old" series on shutdown. - interval: 1m input_series: - - series: 'node_cpu_seconds_total{cpu="0",endpoint="https",instance="instance1",job="node-exporter",mode="idle",namespace="openshift-monitoring",pod="node-exporter-1",service="node-exporter"}' + - series: 'node_cpu_seconds_total{cpu="0",endpoint="https",instance="instance1",job="node-exporter",mode="idle",cluster="kubernetes",namespace="openshift-monitoring",pod="node-exporter-1",service="node-exporter"}' values: '1 1' - - series: 'node_cpu_seconds_total{cpu="0",endpoint="https",instance="instance2",job="node-exporter",mode="idle",namespace="openshift-monitoring",pod="node-exporter-2",service="node-exporter"}' + - series: 'node_cpu_seconds_total{cpu="0",endpoint="https",instance="instance2",job="node-exporter",mode="idle",cluster="kubernetes",namespace="openshift-monitoring",pod="node-exporter-2",service="node-exporter"}' values: '1 1' - - series: 'node_namespace_pod:kube_pod_info:{node="node-1",namespace="openshift-monitoring",pod="node-exporter-1"}' + - series: 'node_namespace_pod:kube_pod_info:{cluster="kubernetes",node="node-1",namespace="openshift-monitoring",pod="node-exporter-1"}' values: '1 1' - - series: 'node_namespace_pod:kube_pod_info:{node="node-2",namespace="openshift-monitoring",pod="node-exporter-2"}' + - series: 'node_namespace_pod:kube_pod_info:{cluster="kubernetes",node="node-2",namespace="openshift-monitoring",pod="node-exporter-2"}' values: '1 1' # series for the "old" prometheus instance. - - series: 'node_namespace_pod:kube_pod_info:{node="node-1",namespace="openshift-monitoring",pod="prometheus-0"}' + - series: 'node_namespace_pod:kube_pod_info:{cluster="kubernetes",node="node-1",namespace="openshift-monitoring",pod="prometheus-0"}' values: '1' # series for the "new" prometheus instance. - - series: 'node_namespace_pod:kube_pod_info:{node="node-2",namespace="openshift-monitoring",pod="prometheus-0"}' + - series: 'node_namespace_pod:kube_pod_info:{cluster="kubernetes",node="node-2",namespace="openshift-monitoring",pod="prometheus-0"}' values: 'stale 1' promql_expr_test: - eval_time: 0m expr: node:node_num_cpu:sum exp_samples: - value: 1 - labels: 'node:node_num_cpu:sum{node="node-1"}' + labels: 'node:node_num_cpu:sum{cluster="kubernetes",node="node-1"}' - value: 1 - labels: 'node:node_num_cpu:sum{node="node-2"}' + labels: 'node:node_num_cpu:sum{cluster="kubernetes",node="node-2"}' - eval_time: 1m expr: node:node_num_cpu:sum exp_samples: - value: 1 - labels: 'node:node_num_cpu:sum{node="node-1"}' + labels: 'node:node_num_cpu:sum{cluster="kubernetes",node="node-1"}' - value: 1 - labels: 'node:node_num_cpu:sum{node="node-2"}' + labels: 'node:node_num_cpu:sum{cluster="kubernetes",node="node-2"}' - interval: 1m input_series: - - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="ds-7cc77d965f-cgsdv",service="ksm"}' + - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="ds-7cc77d965f-cgsdv",service="ksm"}' values: '1 1' - - series: 'kube_pod_owner{endpoint="https",instance="instance2",job="kube-state-metrics",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="ds-7cc77d965f-cgsdv",service="ksm"}' + - series: 'kube_pod_owner{endpoint="https",instance="instance2",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="ds-7cc77d965f-cgsdv",service="ksm"}' values: '1 stale' - - series: 'kube_replicaset_owner{endpoint="https",instance="instance1",job="kube-state-metrics",namespace="ns1",owner_is_controller="true",owner_kind="Deployment",owner_name="ds",pod="ds-777f6bf798-kq7tj",replicaset="ds-7cc77d965f",service="ksm"}' + - series: 'kube_replicaset_owner{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="Deployment",owner_name="ds",pod="ds-777f6bf798-kq7tj",replicaset="ds-7cc77d965f",service="ksm"}' values: '1 1' - - series: 'kube_replicaset_owner{endpoint="https",instance="instance2",job="kube-state-metrics",namespace="ns1",owner_is_controller="true",owner_kind="Deployment",owner_name="ds",pod="ds-777f6bf798-kq7tj",replicaset="ds-7cc77d965f",service="ksm"}' + - series: 'kube_replicaset_owner{endpoint="https",instance="instance2",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="Deployment",owner_name="ds",pod="ds-777f6bf798-kq7tj",replicaset="ds-7cc77d965f",service="ksm"}' values: '1 stale' promql_expr_test: - eval_time: 0m expr: namespace_workload_pod:kube_pod_owner:relabel exp_samples: - value: 1 - labels: 'namespace_workload_pod:kube_pod_owner:relabel{namespace="ns1", pod="ds-7cc77d965f-cgsdv", workload="ds", workload_type="deployment"}' + labels: 'namespace_workload_pod:kube_pod_owner:relabel{cluster="kubernetes",namespace="ns1", pod="ds-7cc77d965f-cgsdv", workload="ds", workload_type="deployment"}' - eval_time: 1m expr: namespace_workload_pod:kube_pod_owner:relabel exp_samples: - value: 1 - labels: 'namespace_workload_pod:kube_pod_owner:relabel{namespace="ns1", pod="ds-7cc77d965f-cgsdv", workload="ds", workload_type="deployment"}' + labels: 'namespace_workload_pod:kube_pod_owner:relabel{cluster="kubernetes",namespace="ns1", pod="ds-7cc77d965f-cgsdv", workload="ds", workload_type="deployment"}' - interval: 1m input_series: - - series: 'kube_pod_status_phase{endpoint="https",instance="instance1",job="kube-state-metrics",namespace="ns1",phase="Pending",pod="pod-ds-7cc77d965f-cgsdv",service="ksm"}' + - series: 'kube_pod_status_phase{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",phase="Pending",pod="pod-ds-7cc77d965f-cgsdv",service="ksm"}' values: '1+0x20' - - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",namespace="ns1",owner_is_controller="false",owner_kind="",owner_name="ds-7cc77d965f",pod="pod-ds-7cc77d965f-cgsdv",service="ksm"}' + - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="false",owner_kind="",owner_name="ds-7cc77d965f",pod="pod-ds-7cc77d965f-cgsdv",service="ksm"}' values: '1+0x20' - - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="pod-ds-7cc77d965f-cgsdv",service="ksm"}' + - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="pod-ds-7cc77d965f-cgsdv",service="ksm"}' values: '1+0x20' alert_rule_test: - eval_time: 15m @@ -706,7 +707,7 @@ tests: expr: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate exp_samples: - value: 5.0e-2 - labels: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster="kubernetes",namespace="monitoring", pod="alertmanager-main-0", container="alertmanager", node="node1"}' + labels: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster="kubernetes",namespace="monitoring", pod="alertmanager-main-0", container="alertmanager",node="node1"}' - interval: 1m input_series: @@ -1066,7 +1067,7 @@ tests: - interval: 1m input_series: - - series: 'kube_job_failed{instance="instance1",condition="true",job="kube-state-metrics",job_name="job-1597623120",namespace="ns1"}' + - series: 'kube_job_failed{instance="instance1",condition="true",job="kube-state-metrics",job_name="job-1597623120",cluster="kubernetes",namespace="ns1"}' values: '1+0x20' alert_rule_test: - eval_time: 15m @@ -1086,9 +1087,9 @@ tests: - interval: 1m input_series: - - series: 'kube_job_status_start_time{namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' + - series: 'kube_job_status_start_time{cluster="kubernetes",namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' values: '0+0x200 _x500 0+0x40' - - series: 'kube_job_status_active{namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' + - series: 'kube_job_status_active{cluster="kubernetes",namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' values: '1x200 _x500 1x40' alert_rule_test: - eval_time: 6h @@ -1107,9 +1108,9 @@ tests: - interval: 1m input_series: - - series: 'kube_job_status_start_time{namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' + - series: 'kube_job_status_start_time{cluster="kubernetes",namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' values: '0+0x740' - - series: 'kube_job_status_active{namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' + - series: 'kube_job_status_active{cluster="kubernetes",namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' values: '1+0x710 0x30' alert_rule_test: - eval_time: 6h @@ -1138,7 +1139,7 @@ tests: - interval: 1m input_series: - - series: 'kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff",namespace="test",pod="static-web",container="script",job="kube-state-metrics"}' + - series: 'kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff",cluster="kubernetes",namespace="test",pod="static-web",container="script",job="kube-state-metrics"}' values: '1 1 stale _x3 1 1 stale _x2 1+0x4 stale' alert_rule_test: - eval_time: 10m # alert hasn't fired @@ -1177,15 +1178,15 @@ tests: # When ResourceQuota has both cpu and requests.cpu, min value of those will be taken into account for quota calculation. - interval: 1m input_series: - - series: 'kube_resourcequota{namespace="test", resource="cpu", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="cpu", type="hard", job="kube-state-metrics"}' values: '1000x10' - - series: 'kube_resourcequota{namespace="test", resource="requests.cpu", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="requests.cpu", type="hard", job="kube-state-metrics"}' values: '100x10' - - series: 'kube_resourcequota{namespace="test1", resource="requests.cpu", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test1", resource="requests.cpu", type="hard", job="kube-state-metrics"}' values: '50x10' - - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring", node="n1", resource="cpu", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n1", resource="cpu", job="kube-state-metrics"}' values: '100x10' - - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring", node="n2", resource="cpu", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n2", resource="cpu", job="kube-state-metrics"}' values: '100x10' alert_rule_test: - eval_time: 4m @@ -1194,15 +1195,15 @@ tests: alertname: KubeCPUQuotaOvercommit - interval: 1m input_series: - - series: 'kube_resourcequota{namespace="test", resource="cpu", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="cpu", type="hard", job="kube-state-metrics"}' values: '1000x10' - - series: 'kube_resourcequota{namespace="test", resource="requests.cpu", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="requests.cpu", type="hard", job="kube-state-metrics"}' values: '200x10' - - series: 'kube_resourcequota{namespace="test1", resource="requests.cpu", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test1", resource="requests.cpu", type="hard", job="kube-state-metrics"}' values: '200x10' - - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring", node="n1", resource="cpu", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n1", resource="cpu", job="kube-state-metrics"}' values: '100x10' - - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring", node="n2", resource="cpu", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n2", resource="cpu", job="kube-state-metrics"}' values: '100x10' alert_rule_test: - eval_time: 4m @@ -1220,15 +1221,15 @@ tests: # When ResourceQuota has both memory and requests.memory, min value of those will be taken into account for quota calculation. - interval: 1m input_series: - - series: 'kube_resourcequota{namespace="test", resource="memory", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="memory", type="hard", job="kube-state-metrics"}' values: '1000x10' - - series: 'kube_resourcequota{namespace="test", resource="requests.memory", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="requests.memory", type="hard", job="kube-state-metrics"}' values: '100x10' - - series: 'kube_resourcequota{namespace="test1", resource="requests.memory", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test1", resource="requests.memory", type="hard", job="kube-state-metrics"}' values: '50x10' - - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring", node="n1", resource="memory", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n1", resource="memory", job="kube-state-metrics"}' values: '100x10' - - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring", node="n2", resource="memory", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n2", resource="memory", job="kube-state-metrics"}' values: '100x10' alert_rule_test: - eval_time: 4m @@ -1237,15 +1238,15 @@ tests: alertname: KubeMemoryQuotaOvercommit - interval: 1m input_series: - - series: 'kube_resourcequota{namespace="test", resource="memory", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="memory", type="hard", job="kube-state-metrics"}' values: '1000x10' - - series: 'kube_resourcequota{namespace="test", resource="requests.memory", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="requests.memory", type="hard", job="kube-state-metrics"}' values: '500x10' - - series: 'kube_resourcequota{namespace="test1", resource="requests.memory", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test1", resource="requests.memory", type="hard", job="kube-state-metrics"}' values: '500x10' - - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring", node="n1", resource="memory", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n1", resource="memory", job="kube-state-metrics"}' values: '10x10' - - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring", node="n2", resource="memory", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n2", resource="memory", job="kube-state-metrics"}' values: '10x10' alert_rule_test: - eval_time: 4m From 0667c69707357484fc2a8b72da77f2e5152816ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan-Otto=20Kr=C3=B6pke?= Date: Fri, 24 Nov 2023 15:10:00 +0100 Subject: [PATCH 4/6] fix tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jan-Otto Kröpke --- tests.yaml | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/tests.yaml b/tests.yaml index f874af689..98ced295c 100644 --- a/tests.yaml +++ b/tests.yaml @@ -480,48 +480,48 @@ tests: - interval: 1m input_series: - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.15+0x10' - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm",cluster="test"}' + - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' values: '0.15+0x10' - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.1+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '1E9+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm",cluster="test"}' + - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' values: '1E9+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.5E9+0x10' # Duplicate kube_pod_status_phase timeseries for the same pod. - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm"}' values: '1 stale' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Pending",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Pending",pod="pod-1",service="ksm"}' values: '1+0x10' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Completed",pod="pod-2",service="ksm",cluster="test"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Completed",pod="pod-2",service="ksm"}' values: '1+0x10' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm"}' values: '1+0x10' promql_expr_test: - eval_time: 0m expr: namespace_cpu:kube_pod_container_resource_requests:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver",cluster="test"}' + labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 0m expr: namespace_memory:kube_pod_container_resource_requests:sum exp_samples: - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver",cluster="test"}' + labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 1m expr: namespace_cpu:kube_pod_container_resource_requests:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver",cluster="test"}' + labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 1m expr: namespace_memory:kube_pod_container_resource_requests:sum exp_samples: - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver",cluster="test"}' + labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - interval: 1m input_series: @@ -680,6 +680,7 @@ tests: alertname: KubePodNotReady exp_alerts: - exp_labels: + cluster: kubernetes namespace: ns1 pod: pod-ds-7cc77d965f-cgsdv severity: warning @@ -1098,6 +1099,7 @@ tests: alertname: KubeJobNotCompleted exp_alerts: - exp_labels: + cluster: "kubernetes" namespace: ns1 job_name: job1 severity: warning @@ -1151,6 +1153,7 @@ tests: severity: "warning" container: "script" job: "kube-state-metrics" + cluster: "kubernetes" namespace: "test" pod: "static-web" reason: "CrashLoopBackOff" From 4e71813240a36bc8a756fc8200362c7622d2f9c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan-Otto=20Kr=C3=B6pke?= Date: Fri, 24 Nov 2023 15:18:13 +0100 Subject: [PATCH 5/6] fix tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jan-Otto Kröpke --- tests.yaml | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/tests.yaml b/tests.yaml index 98ced295c..2a9d292e0 100644 --- a/tests.yaml +++ b/tests.yaml @@ -526,37 +526,38 @@ tests: - interval: 1m input_series: # Create a histogram where all of the last 10 samples are in the +Inf (> 10 seconds) bucket. - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.005", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.005", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.01", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.01", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.025", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.025", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.05", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.05", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.1", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.1", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.25", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.25", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.5", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.5", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="1", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="1", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="2.5", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="2.5", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="5", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="5", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="10", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="10", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="+Inf", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="+Inf", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '30+1x10' - - series: 'kubelet_node_name{endpoint="https-metrics",instance="10.0.2.15:10250",job="kubelet",cluster="kubernetes",namespace="kube-system",node="minikube",service="kubelet"}' + - series: 'kubelet_node_name{endpoint="https-metrics",cluster="kubernetes",instance="10.0.2.15:10250",job="kubelet",,namespace="kube-system",node="minikube",service="kubelet"}' values: '1 1 1 1 1 1 1 1 1 1' alert_rule_test: - eval_time: 10m alertname: KubeletPlegDurationHigh exp_alerts: - exp_labels: + cluster: "kubernetes" instance: 10.0.2.15:10250 node: minikube quantile: 0.99 @@ -568,7 +569,7 @@ tests: - interval: 1m input_series: - - series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",instance="10.0.2.15:10250",job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}' + - series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",,namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}' values: '1 0 1 0 1 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1' alert_rule_test: - eval_time: 18m @@ -1075,6 +1076,7 @@ tests: alertname: KubeJobFailed exp_alerts: - exp_labels: + cluster: "kubernetes" namespace: ns1 job_name: job-1597623120 severity: warning @@ -1168,6 +1170,7 @@ tests: severity: "warning" container: "script" job: "kube-state-metrics" + cluster: "kubernetes" namespace: "test" pod: "static-web" reason: "CrashLoopBackOff" From 6212c1e145e492b98cac3edfb92e8ef11b26112d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan-Otto=20Kr=C3=B6pke?= Date: Fri, 24 Nov 2023 17:53:51 +0100 Subject: [PATCH 6/6] fix tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jan-Otto Kröpke --- tests.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests.yaml b/tests.yaml index 2a9d292e0..f1893b63c 100644 --- a/tests.yaml +++ b/tests.yaml @@ -426,6 +426,7 @@ tests: alertname: KubeletTooManyPods exp_alerts: - exp_labels: + cluster: kubernetes node: minikube severity: info exp_annotations: @@ -550,7 +551,7 @@ tests: values: '1+0x10' - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="+Inf", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '30+1x10' - - series: 'kubelet_node_name{endpoint="https-metrics",cluster="kubernetes",instance="10.0.2.15:10250",job="kubelet",,namespace="kube-system",node="minikube",service="kubelet"}' + - series: 'kubelet_node_name{endpoint="https-metrics",cluster="kubernetes",instance="10.0.2.15:10250",job="kubelet",namespace="kube-system",node="minikube",service="kubelet"}' values: '1 1 1 1 1 1 1 1 1 1' alert_rule_test: - eval_time: 10m @@ -569,7 +570,7 @@ tests: - interval: 1m input_series: - - series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",,namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}' + - series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}' values: '1 0 1 0 1 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1' alert_rule_test: - eval_time: 18m