diff --git a/alerts/storage_alerts.libsonnet b/alerts/storage_alerts.libsonnet index ce028683f..ebeed2b85 100644 --- a/alerts/storage_alerts.libsonnet +++ b/alerts/storage_alerts.libsonnet @@ -26,9 +26,9 @@ ) < 0.03 and kubelet_volume_stats_used_bytes{%(prefixedNamespaceSelector)s%(kubeletSelector)s} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(%(clusterLabel)s, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{%(prefixedNamespaceSelector)s access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(%(clusterLabel)s, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{%(prefixedNamespaceSelector)s%(pvExcludedSelector)s} == 1 ||| % $._config, 'for': '1m', @@ -36,7 +36,7 @@ severity: 'critical', }, annotations: { - description: 'The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free.', + description: 'The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.%(clusterLabel)s }} is only {{ $value | humanizePercentage }} free.' % $._config, summary: 'PersistentVolume is filling up.', }, }, @@ -52,9 +52,9 @@ kubelet_volume_stats_used_bytes{%(prefixedNamespaceSelector)s%(kubeletSelector)s} > 0 and predict_linear(kubelet_volume_stats_available_bytes{%(prefixedNamespaceSelector)s%(kubeletSelector)s}[%(volumeFullPredictionSampleTime)s], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(%(clusterLabel)s, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{%(prefixedNamespaceSelector)s access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(%(clusterLabel)s, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{%(prefixedNamespaceSelector)s%(pvExcludedSelector)s} == 1 ||| % $._config, 'for': '1h', @@ -62,7 +62,7 @@ severity: 'warning', }, annotations: { - description: 'Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available.', + description: 'Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.%(clusterLabel)s }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available.' % $._config, summary: 'PersistentVolume is filling up.', }, }, @@ -76,9 +76,9 @@ ) < 0.03 and kubelet_volume_stats_inodes_used{%(prefixedNamespaceSelector)s%(kubeletSelector)s} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(%(clusterLabel)s, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{%(prefixedNamespaceSelector)s access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(%(clusterLabel)s, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{%(prefixedNamespaceSelector)s%(pvExcludedSelector)s} == 1 ||| % $._config, 'for': '1m', @@ -86,7 +86,7 @@ severity: 'critical', }, annotations: { - description: 'The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} only has {{ $value | humanizePercentage }} free inodes.', + description: 'The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.%(clusterLabel)s }} only has {{ $value | humanizePercentage }} free inodes.' % $._config, summary: 'PersistentVolumeInodes are filling up.', }, }, @@ -102,9 +102,9 @@ kubelet_volume_stats_inodes_used{%(prefixedNamespaceSelector)s%(kubeletSelector)s} > 0 and predict_linear(kubelet_volume_stats_inodes_free{%(prefixedNamespaceSelector)s%(kubeletSelector)s}[%(volumeFullPredictionSampleTime)s], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(%(clusterLabel)s, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{%(prefixedNamespaceSelector)s access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(%(clusterLabel)s, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{%(prefixedNamespaceSelector)s%(pvExcludedSelector)s} == 1 ||| % $._config, 'for': '1h', @@ -112,7 +112,7 @@ severity: 'warning', }, annotations: { - description: 'Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to run out of inodes within four days. Currently {{ $value | humanizePercentage }} of its inodes are free.', + description: 'Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.%(clusterLabel)s }} is expected to run out of inodes within four days. Currently {{ $value | humanizePercentage }} of its inodes are free.' % $._config, summary: 'PersistentVolumeInodes are filling up.', }, }, @@ -126,7 +126,7 @@ severity: 'critical', }, annotations: { - description: 'The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}.', + description: 'The persistent volume {{ $labels.persistentvolume }} on Cluster {{ $labels.%(clusterLabel)s }} has status {{ $labels.phase }}.' % $._config, summary: 'PersistentVolume is having issues with provisioning.', }, }, diff --git a/rules/node.libsonnet b/rules/node.libsonnet index e4e949bd7..453e3720b 100644 --- a/rules/node.libsonnet +++ b/rules/node.libsonnet @@ -32,8 +32,8 @@ expr: ||| count by (%(clusterLabel)s, node) ( node_cpu_seconds_total{mode="idle",%(nodeExporterSelector)s} - * on (namespace, %(podLabel)s) group_left(node) - topk by(namespace, %(podLabel)s) (1, node_namespace_pod:kube_pod_info:) + * on (%(clusterLabel)s, namespace, %(podLabel)s) group_left(node) + topk by(%(clusterLabel)s, namespace, %(podLabel)s) (1, node_namespace_pod:kube_pod_info:) ) ||| % $._config, }, diff --git a/tests.yaml b/tests.yaml index a2fb0bb0f..f1893b63c 100644 --- a/tests.yaml +++ b/tests.yaml @@ -8,13 +8,13 @@ tests: # PersistentVolume disk space - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '16 64 512 1024' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1 1 1 1' alert_rule_test: - eval_time: 1m @@ -29,23 +29,24 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes persistentvolumeclaim: somepvc severity: critical exp_annotations: summary: "PersistentVolume is filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring is only 1.562% free.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes is only 1.562% free.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup # Don't alert when PVC access_mode is ReadOnlyMany - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '16 64 512 1024' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' values: '1 1 1 1' alert_rule_test: - eval_time: 1m @@ -61,11 +62,11 @@ tests: # See https://github.com/kubernetes/kubernetes/commit/b997e0e4d6ccbead435a47d6ac75b0db3d17252f for details. - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '0 0 0 0' alert_rule_test: - eval_time: 1m @@ -80,15 +81,15 @@ tests: # Don't alert when PVC has been labelled as fully utilised - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '16 64 512 1024' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1 1 1 1' - - series: 'kube_persistentvolumeclaim_labels{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' + - series: 'kube_persistentvolumeclaim_labels{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' values: '1 1 1 1' alert_rule_test: - eval_time: 1m @@ -102,11 +103,11 @@ tests: - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024+10x61' alert_rule_test: - eval_time: 1h @@ -115,22 +116,23 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes persistentvolumeclaim: somepvc severity: critical exp_annotations: summary: "PersistentVolume is filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring is only 1.294% free.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes is only 1.294% free.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024+10x61' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1x61' alert_rule_test: - eval_time: 61m @@ -139,31 +141,33 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes persistentvolumeclaim: somepvc severity: warning exp_annotations: summary: "PersistentVolume is filling up." - description: 'Based on recent sampling, the PersistentVolume claimed by somepvc in Namespace monitoring is expected to fill up within four days. Currently 1.263% is available.' + description: 'Based on recent sampling, the PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes is expected to fill up within four days. Currently 1.263% is available.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes persistentvolumeclaim: somepvc severity: critical exp_annotations: summary: "PersistentVolume is filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring is only 1.263% free.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes is only 1.263% free.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup # Block volume mounts can report 0 for the kubelet_volume_stats_used_bytes metric but it shouldn't trigger the KubePersistentVolumeFillingUp alert. # See https://github.com/kubernetes/kubernetes/commit/b997e0e4d6ccbead435a47d6ac75b0db3d17252f for details. - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '0x61' alert_rule_test: - eval_time: 61m @@ -172,13 +176,13 @@ tests: # Don't alert when PVC access_mode is ReadOnlyMany - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1x61' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' values: '1x61' alert_rule_test: - eval_time: 61m @@ -186,15 +190,15 @@ tests: - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024+10x61' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1x61' - - series: 'kube_persistentvolumeclaim_labels{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' + - series: 'kube_persistentvolumeclaim_labels{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' values: '1x61' alert_rule_test: - eval_time: 61m @@ -203,13 +207,13 @@ tests: # PersistentVolume inodes - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '16 64 512 1024' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1 1 1 1' alert_rule_test: - eval_time: 1m @@ -224,23 +228,24 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes persistentvolumeclaim: somepvc severity: critical exp_annotations: summary: "PersistentVolumeInodes are filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring only has 1.562% free inodes.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes only has 1.562% free inodes.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeinodesfillingup # Don't alert when PVC access_mode is ReadOnlyMany - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '16 64 512 1024' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' values: '1 1 1 1' alert_rule_test: - eval_time: 1m @@ -256,11 +261,11 @@ tests: # See https://github.com/kubernetes/kubernetes/commit/b997e0e4d6ccbead435a47d6ac75b0db3d17252f for details. - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '0 0 0 0' alert_rule_test: - eval_time: 1m @@ -275,15 +280,15 @@ tests: # Don't alert when PVC has been labelled as fully utilised - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '16 64 512 1024' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1 1 1 1' - - series: 'kube_persistentvolumeclaim_labels{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' + - series: 'kube_persistentvolumeclaim_labels{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' values: '1 1 1 1' alert_rule_test: - eval_time: 1m @@ -297,11 +302,11 @@ tests: - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024+10x61' alert_rule_test: - eval_time: 1h @@ -310,22 +315,23 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes persistentvolumeclaim: somepvc severity: critical exp_annotations: summary: "PersistentVolumeInodes are filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring only has 1.294% free inodes.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes only has 1.294% free inodes.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeinodesfillingup - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024+10x61' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1x61' alert_rule_test: - eval_time: 61m @@ -334,31 +340,33 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes persistentvolumeclaim: somepvc severity: warning exp_annotations: summary: "PersistentVolumeInodes are filling up." - description: 'Based on recent sampling, the PersistentVolume claimed by somepvc in Namespace monitoring is expected to run out of inodes within four days. Currently 1.263% of its inodes are free.' + description: 'Based on recent sampling, the PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes is expected to run out of inodes within four days. Currently 1.263% of its inodes are free.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeinodesfillingup - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes persistentvolumeclaim: somepvc severity: critical exp_annotations: summary: "PersistentVolumeInodes are filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring only has 1.263% free inodes.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes only has 1.263% free inodes.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeinodesfillingup # Block volume mounts can report 0 for the kubelet_volume_stats_inodes_used metric but it shouldn't trigger the KubePersistentVolumeInodesFillingUp alert. # See https://github.com/kubernetes/kubernetes/commit/b997e0e4d6ccbead435a47d6ac75b0db3d17252f for details. - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '0x61' alert_rule_test: - eval_time: 61m @@ -367,13 +375,13 @@ tests: # Don't alert when PVC access_mode is ReadOnlyMany - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1x61' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' values: '1x61' alert_rule_test: - eval_time: 61m @@ -381,15 +389,15 @@ tests: - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024+10x61' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1x61' - - series: 'kube_persistentvolumeclaim_labels{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' + - series: 'kube_persistentvolumeclaim_labels{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' values: '1x61' alert_rule_test: - eval_time: 61m @@ -397,19 +405,19 @@ tests: - interval: 1m input_series: - - series: 'kube_node_status_capacity{resource="pods",instance="172.17.0.5:8443",node="minikube",job="kube-state-metrics", namespace="kube-system"}' + - series: 'kube_node_status_capacity{resource="pods",instance="172.17.0.5:8443",cluster="kubernetes",node="minikube",job="kube-state-metrics",namespace="kube-system"}' values: '3+0x15' - - series: 'kube_pod_info{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",namespace="kube-system",node="minikube",pod="pod-1",service="kube-state-metrics"}' + - series: 'kube_pod_info{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",cluster="kubernetes",namespace="kube-system",node="minikube",pod="pod-1",service="kube-state-metrics"}' values: '1+0x15' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",namespace="kube-system",phase="Running",pod="pod-1",service="kube-state-metrics"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",cluster="kubernetes",namespace="kube-system",phase="Running",pod="pod-1",service="kube-state-metrics"}' values: '1+0x15' - - series: 'kube_pod_info{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",namespace="kube-system",node="minikube",pod="pod-2",service="kube-state-metrics"}' + - series: 'kube_pod_info{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",cluster="kubernetes",namespace="kube-system",node="minikube",pod="pod-2",service="kube-state-metrics"}' values: '1+0x15' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",namespace="kube-system",phase="Running",pod="pod-2",service="kube-state-metrics"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",cluster="kubernetes",namespace="kube-system",phase="Running",pod="pod-2",service="kube-state-metrics"}' values: '1+0x15' - - series: 'kube_pod_info{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",namespace="kube-system",node="minikube",pod="pod-3",service="kube-state-metrics"}' + - series: 'kube_pod_info{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",cluster="kubernetes",namespace="kube-system",node="minikube",pod="pod-3",service="kube-state-metrics"}' values: '1+0x15' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",namespace="kube-system",phase="Running",pod="pod-3",service="kube-state-metrics"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",cluster="kubernetes",namespace="kube-system",phase="Running",pod="pod-3",service="kube-state-metrics"}' values: '1+0x15' alert_rule_test: - eval_time: 10m @@ -418,6 +426,7 @@ tests: alertname: KubeletTooManyPods exp_alerts: - exp_labels: + cluster: kubernetes node: minikube severity: info exp_annotations: @@ -427,128 +436,129 @@ tests: - interval: 1m input_series: - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.15+0x10' - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' + - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' values: '0.15+0x10' - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.1+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '1E9+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' + - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' values: '1E9+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.5E9+0x10' # Duplicate kube_pod_status_phase timeseries for the same pod. - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm"}' values: '1 stale' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",phase="Pending",pod="pod-1",service="ksm"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Pending",pod="pod-1",service="ksm"}' values: '1+0x10' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",phase="Completed",pod="pod-2",service="ksm"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Completed",pod="pod-2",service="ksm"}' values: '1+0x10' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-2",job="kube-state-metrics",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm"}' values: '1+0x10' promql_expr_test: - eval_time: 0m expr: namespace_cpu:kube_pod_container_resource_requests:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{namespace="kube-apiserver"}' + labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 0m expr: namespace_memory:kube_pod_container_resource_requests:sum exp_samples: - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{namespace="kube-apiserver"}' + labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 1m expr: namespace_cpu:kube_pod_container_resource_requests:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{namespace="kube-apiserver"}' + labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 1m expr: namespace_memory:kube_pod_container_resource_requests:sum exp_samples: - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{namespace="kube-apiserver"}' + labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - interval: 1m input_series: - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.15+0x10' - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm",cluster="test"}' + - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' values: '0.15+0x10' - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.1+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '1E9+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm",cluster="test"}' + - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' values: '1E9+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.5E9+0x10' # Duplicate kube_pod_status_phase timeseries for the same pod. - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm"}' values: '1 stale' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",phase="Pending",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Pending",pod="pod-1",service="ksm"}' values: '1+0x10' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",phase="Completed",pod="pod-2",service="ksm",cluster="test"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Completed",pod="pod-2",service="ksm"}' values: '1+0x10' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-2",job="kube-state-metrics",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm",cluster="test"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm"}' values: '1+0x10' promql_expr_test: - eval_time: 0m expr: namespace_cpu:kube_pod_container_resource_requests:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{namespace="kube-apiserver",cluster="test"}' + labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 0m expr: namespace_memory:kube_pod_container_resource_requests:sum exp_samples: - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{namespace="kube-apiserver",cluster="test"}' + labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 1m expr: namespace_cpu:kube_pod_container_resource_requests:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{namespace="kube-apiserver",cluster="test"}' + labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 1m expr: namespace_memory:kube_pod_container_resource_requests:sum exp_samples: - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{namespace="kube-apiserver",cluster="test"}' + labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - interval: 1m input_series: # Create a histogram where all of the last 10 samples are in the +Inf (> 10 seconds) bucket. - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.005", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.005", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.01", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.01", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.025", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.025", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.05", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.05", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.1", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.1", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.25", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.25", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.5", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.5", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="1", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="1", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="2.5", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="2.5", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="5", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="5", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="10", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="10", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="+Inf", instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="+Inf", cluster="kubernetes",instance="10.0.2.15:10250"}' values: '30+1x10' - - series: 'kubelet_node_name{endpoint="https-metrics",instance="10.0.2.15:10250",job="kubelet",namespace="kube-system",node="minikube",service="kubelet"}' + - series: 'kubelet_node_name{endpoint="https-metrics",cluster="kubernetes",instance="10.0.2.15:10250",job="kubelet",namespace="kube-system",node="minikube",service="kubelet"}' values: '1 1 1 1 1 1 1 1 1 1' alert_rule_test: - eval_time: 10m alertname: KubeletPlegDurationHigh exp_alerts: - exp_labels: + cluster: "kubernetes" instance: 10.0.2.15:10250 node: minikube quantile: 0.99 @@ -560,13 +570,14 @@ tests: - interval: 1m input_series: - - series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}' + - series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}' values: '1 0 1 0 1 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1' alert_rule_test: - eval_time: 18m alertname: KubeNodeReadinessFlapping exp_alerts: - exp_labels: + cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -577,27 +588,27 @@ tests: # Verify that node:node_num_cpu:sum triggers no many-to-many errors. - interval: 1m input_series: - - series: 'node_cpu_seconds_total{cpu="0",endpoint="https",instance="instance1",job="node-exporter",mode="idle",namespace="openshift-monitoring",pod="node-exporter-1",service="node-exporter"}' + - series: 'node_cpu_seconds_total{cpu="0",endpoint="https",instance="instance1",job="node-exporter",mode="idle",cluster="kubernetes",namespace="openshift-monitoring",pod="node-exporter-1",service="node-exporter"}' values: '1 1' - - series: 'node_cpu_seconds_total{cpu="1",endpoint="https",instance="instance1",job="node-exporter",mode="idle",namespace="openshift-monitoring",pod="node-exporter-1",service="node-exporter"}' + - series: 'node_cpu_seconds_total{cpu="1",endpoint="https",instance="instance1",job="node-exporter",mode="idle",cluster="kubernetes",namespace="openshift-monitoring",pod="node-exporter-1",service="node-exporter"}' values: '1 1' - - series: 'kube_pod_info{namespace="openshift-monitoring",node="node-1",pod="node-exporter-1",job="kube-state-metrics",instance="10.129.2.7:8443"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="openshift-monitoring",node="node-1",pod="node-exporter-1",job="kube-state-metrics",instance="10.129.2.7:8443"}' values: '1 1' - - series: 'kube_pod_info{namespace="openshift-monitoring",node="node-1",pod="alertmanager-0",job="kube-state-metrics",instance="10.129.2.7:8443"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="openshift-monitoring",node="node-1",pod="alertmanager-0",job="kube-state-metrics",instance="10.129.2.7:8443"}' values: '1 stale' - - series: 'kube_pod_info{namespace="openshift-monitoring",node="node-2",pod="alertmanager-0",job="kube-state-metrics",instance="10.129.2.7:8443"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="openshift-monitoring",node="node-2",pod="alertmanager-0",job="kube-state-metrics",instance="10.129.2.7:8443"}' values: '1 1' promql_expr_test: - eval_time: 0m expr: node:node_num_cpu:sum exp_samples: - value: 2 - labels: 'node:node_num_cpu:sum{node="node-1"}' + labels: 'node:node_num_cpu:sum{cluster="kubernetes",node="node-1"}' - eval_time: 1m expr: node:node_num_cpu:sum exp_samples: - value: 2 - labels: 'node:node_num_cpu:sum{node="node-1"}' + labels: 'node:node_num_cpu:sum{cluster="kubernetes",node="node-1"}' # Verify that node:node_num_cpu:sum doesn't trigger many-to-many errors when # node_namespace_pod:kube_pod_info: has duplicate entries for the same @@ -605,72 +616,73 @@ tests: # it didn't add stale markers to the "old" series on shutdown. - interval: 1m input_series: - - series: 'node_cpu_seconds_total{cpu="0",endpoint="https",instance="instance1",job="node-exporter",mode="idle",namespace="openshift-monitoring",pod="node-exporter-1",service="node-exporter"}' + - series: 'node_cpu_seconds_total{cpu="0",endpoint="https",instance="instance1",job="node-exporter",mode="idle",cluster="kubernetes",namespace="openshift-monitoring",pod="node-exporter-1",service="node-exporter"}' values: '1 1' - - series: 'node_cpu_seconds_total{cpu="0",endpoint="https",instance="instance2",job="node-exporter",mode="idle",namespace="openshift-monitoring",pod="node-exporter-2",service="node-exporter"}' + - series: 'node_cpu_seconds_total{cpu="0",endpoint="https",instance="instance2",job="node-exporter",mode="idle",cluster="kubernetes",namespace="openshift-monitoring",pod="node-exporter-2",service="node-exporter"}' values: '1 1' - - series: 'node_namespace_pod:kube_pod_info:{node="node-1",namespace="openshift-monitoring",pod="node-exporter-1"}' + - series: 'node_namespace_pod:kube_pod_info:{cluster="kubernetes",node="node-1",namespace="openshift-monitoring",pod="node-exporter-1"}' values: '1 1' - - series: 'node_namespace_pod:kube_pod_info:{node="node-2",namespace="openshift-monitoring",pod="node-exporter-2"}' + - series: 'node_namespace_pod:kube_pod_info:{cluster="kubernetes",node="node-2",namespace="openshift-monitoring",pod="node-exporter-2"}' values: '1 1' # series for the "old" prometheus instance. - - series: 'node_namespace_pod:kube_pod_info:{node="node-1",namespace="openshift-monitoring",pod="prometheus-0"}' + - series: 'node_namespace_pod:kube_pod_info:{cluster="kubernetes",node="node-1",namespace="openshift-monitoring",pod="prometheus-0"}' values: '1' # series for the "new" prometheus instance. - - series: 'node_namespace_pod:kube_pod_info:{node="node-2",namespace="openshift-monitoring",pod="prometheus-0"}' + - series: 'node_namespace_pod:kube_pod_info:{cluster="kubernetes",node="node-2",namespace="openshift-monitoring",pod="prometheus-0"}' values: 'stale 1' promql_expr_test: - eval_time: 0m expr: node:node_num_cpu:sum exp_samples: - value: 1 - labels: 'node:node_num_cpu:sum{node="node-1"}' + labels: 'node:node_num_cpu:sum{cluster="kubernetes",node="node-1"}' - value: 1 - labels: 'node:node_num_cpu:sum{node="node-2"}' + labels: 'node:node_num_cpu:sum{cluster="kubernetes",node="node-2"}' - eval_time: 1m expr: node:node_num_cpu:sum exp_samples: - value: 1 - labels: 'node:node_num_cpu:sum{node="node-1"}' + labels: 'node:node_num_cpu:sum{cluster="kubernetes",node="node-1"}' - value: 1 - labels: 'node:node_num_cpu:sum{node="node-2"}' + labels: 'node:node_num_cpu:sum{cluster="kubernetes",node="node-2"}' - interval: 1m input_series: - - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="ds-7cc77d965f-cgsdv",service="ksm"}' + - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="ds-7cc77d965f-cgsdv",service="ksm"}' values: '1 1' - - series: 'kube_pod_owner{endpoint="https",instance="instance2",job="kube-state-metrics",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="ds-7cc77d965f-cgsdv",service="ksm"}' + - series: 'kube_pod_owner{endpoint="https",instance="instance2",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="ds-7cc77d965f-cgsdv",service="ksm"}' values: '1 stale' - - series: 'kube_replicaset_owner{endpoint="https",instance="instance1",job="kube-state-metrics",namespace="ns1",owner_is_controller="true",owner_kind="Deployment",owner_name="ds",pod="ds-777f6bf798-kq7tj",replicaset="ds-7cc77d965f",service="ksm"}' + - series: 'kube_replicaset_owner{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="Deployment",owner_name="ds",pod="ds-777f6bf798-kq7tj",replicaset="ds-7cc77d965f",service="ksm"}' values: '1 1' - - series: 'kube_replicaset_owner{endpoint="https",instance="instance2",job="kube-state-metrics",namespace="ns1",owner_is_controller="true",owner_kind="Deployment",owner_name="ds",pod="ds-777f6bf798-kq7tj",replicaset="ds-7cc77d965f",service="ksm"}' + - series: 'kube_replicaset_owner{endpoint="https",instance="instance2",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="Deployment",owner_name="ds",pod="ds-777f6bf798-kq7tj",replicaset="ds-7cc77d965f",service="ksm"}' values: '1 stale' promql_expr_test: - eval_time: 0m expr: namespace_workload_pod:kube_pod_owner:relabel exp_samples: - value: 1 - labels: 'namespace_workload_pod:kube_pod_owner:relabel{namespace="ns1", pod="ds-7cc77d965f-cgsdv", workload="ds", workload_type="deployment"}' + labels: 'namespace_workload_pod:kube_pod_owner:relabel{cluster="kubernetes",namespace="ns1", pod="ds-7cc77d965f-cgsdv", workload="ds", workload_type="deployment"}' - eval_time: 1m expr: namespace_workload_pod:kube_pod_owner:relabel exp_samples: - value: 1 - labels: 'namespace_workload_pod:kube_pod_owner:relabel{namespace="ns1", pod="ds-7cc77d965f-cgsdv", workload="ds", workload_type="deployment"}' + labels: 'namespace_workload_pod:kube_pod_owner:relabel{cluster="kubernetes",namespace="ns1", pod="ds-7cc77d965f-cgsdv", workload="ds", workload_type="deployment"}' - interval: 1m input_series: - - series: 'kube_pod_status_phase{endpoint="https",instance="instance1",job="kube-state-metrics",namespace="ns1",phase="Pending",pod="pod-ds-7cc77d965f-cgsdv",service="ksm"}' + - series: 'kube_pod_status_phase{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",phase="Pending",pod="pod-ds-7cc77d965f-cgsdv",service="ksm"}' values: '1+0x20' - - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",namespace="ns1",owner_is_controller="false",owner_kind="",owner_name="ds-7cc77d965f",pod="pod-ds-7cc77d965f-cgsdv",service="ksm"}' + - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="false",owner_kind="",owner_name="ds-7cc77d965f",pod="pod-ds-7cc77d965f-cgsdv",service="ksm"}' values: '1+0x20' - - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="pod-ds-7cc77d965f-cgsdv",service="ksm"}' + - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="pod-ds-7cc77d965f-cgsdv",service="ksm"}' values: '1+0x20' alert_rule_test: - eval_time: 15m alertname: KubePodNotReady exp_alerts: - exp_labels: + cluster: kubernetes namespace: ns1 pod: pod-ds-7cc77d965f-cgsdv severity: warning @@ -681,120 +693,120 @@ tests: - interval: 1m input_series: - - series: 'container_cpu_usage_seconds_total{container="alertmanager",cpu="total",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' + - series: 'container_cpu_usage_seconds_total{container="alertmanager",cpu="total",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' values: '0+3x5' - - series: 'container_cpu_usage_seconds_total{container="alertmanager",cpu="total",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' + - series: 'container_cpu_usage_seconds_total{container="alertmanager",cpu="total",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' values: '0+3x5' # Duplicate timeseries from different instances. - - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' - - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' values: '1+0x5' # Missing node label. - - series: 'kube_pod_info{namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' promql_expr_test: - eval_time: 5m expr: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate exp_samples: - value: 5.0e-2 - labels: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace="monitoring", pod="alertmanager-main-0", container="alertmanager", node="node1"}' + labels: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster="kubernetes",namespace="monitoring", pod="alertmanager-main-0", container="alertmanager",node="node1"}' - interval: 1m input_series: - - series: 'container_memory_working_set_bytes{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' + - series: 'container_memory_working_set_bytes{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' values: '1000+0x5' - - series: 'container_memory_working_set_bytes{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' + - series: 'container_memory_working_set_bytes{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' values: '1000+0x5' # Duplicate timeseries from different instances. - - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' - - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' values: '1+0x5' # Missing node label. - - series: 'kube_pod_info{namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' promql_expr_test: - eval_time: 5m expr: node_namespace_pod_container:container_memory_working_set_bytes exp_samples: - value: 1.0e+3 - labels: 'node_namespace_pod_container:container_memory_working_set_bytes{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' + labels: 'node_namespace_pod_container:container_memory_working_set_bytes{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' - interval: 1m input_series: - - series: 'container_memory_rss{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' + - series: 'container_memory_rss{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' values: '1000+0x5' - - series: 'container_memory_rss{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' + - series: 'container_memory_rss{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' values: '1000+0x5' # Duplicate timeseries from different instances. - - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' - - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' values: '1+0x5' # Missing node label. - - series: 'kube_pod_info{namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' promql_expr_test: - eval_time: 5m expr: node_namespace_pod_container:container_memory_rss exp_samples: - value: 1.0e+3 - labels: 'node_namespace_pod_container:container_memory_rss{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' + labels: 'node_namespace_pod_container:container_memory_rss{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' - interval: 1m input_series: - - series: 'container_memory_cache{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' + - series: 'container_memory_cache{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' values: '1000+0x5' - - series: 'container_memory_cache{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' + - series: 'container_memory_cache{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' values: '1000+0x5' # Duplicate timeseries from different instances. - - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' - - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' values: '1+0x5' # Missing node label. - - series: 'kube_pod_info{namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' promql_expr_test: - eval_time: 5m expr: node_namespace_pod_container:container_memory_cache exp_samples: - value: 1.0e+3 - labels: 'node_namespace_pod_container:container_memory_cache{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' + labels: 'node_namespace_pod_container:container_memory_cache{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' - interval: 1m input_series: - - series: 'container_memory_swap{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' + - series: 'container_memory_swap{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' values: '1000+0x5' - - series: 'container_memory_swap{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' + - series: 'container_memory_swap{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' values: '1000+0x5' # Duplicate timeseries from different instances. - - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' - - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' values: '1+0x5' # Missing node label. - - series: 'kube_pod_info{namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' promql_expr_test: - eval_time: 5m expr: node_namespace_pod_container:container_memory_swap exp_samples: - value: 1.0e+3 - labels: 'node_namespace_pod_container:container_memory_swap{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' + labels: 'node_namespace_pod_container:container_memory_swap{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' - interval: 1m # Current unequal desired and not progressing. input_series: - - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 3 4 4 4 3 4 4 4 3 4 4 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4' - - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' - - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4' - - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 3 3 3 4 3 3 3 4 3 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4' alert_rule_test: - eval_time: 32m @@ -805,6 +817,7 @@ tests: - exp_labels: job: kube-state-metrics namespace: monitoring + cluster: kubernetes daemonset: node-exporter severity: warning exp_annotations: @@ -816,7 +829,7 @@ tests: # KubeDeploymentRolloutStuck - interval: 1m input_series: - - series: 'kube_deployment_status_condition{job="kube-state-metrics",namespace="monitoring",deployment="stuck", condition="Progressing", status="false"}' + - series: 'kube_deployment_status_condition{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",deployment="stuck", condition="Progressing", status="false"}' values: '1+0x17 0+0x5' alert_rule_test: - eval_time: 14m @@ -827,6 +840,7 @@ tests: - exp_labels: job: kube-state-metrics namespace: monitoring + cluster: kubernetes deployment: stuck severity: warning condition: Progressing @@ -840,15 +854,15 @@ tests: - interval: 1m # Misscheduled is non zero. input_series: - - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 3 4 4 4 3 4 4 4 3 4 4 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4' - - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0' - - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4' - - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 3 3 3 4 3 3 3 4 3 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4' alert_rule_test: - eval_time: 32m @@ -859,6 +873,7 @@ tests: - exp_labels: job: kube-state-metrics namespace: monitoring + cluster: kubernetes daemonset: node-exporter severity: warning exp_annotations: @@ -870,15 +885,15 @@ tests: - interval: 1m # Updated number unequal desired. input_series: - - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 3 4 4 4 3 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' - - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4' - - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 3 3 3 4 3 3 3 4 3 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4' alert_rule_test: - eval_time: 32m @@ -889,6 +904,7 @@ tests: - exp_labels: job: kube-state-metrics namespace: monitoring + cluster: kubernetes daemonset: node-exporter severity: warning exp_annotations: @@ -900,15 +916,15 @@ tests: - interval: 1m # Number available unequal desired. input_series: - - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 3 4 4 4 3 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' - - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 0 0 0 1 1 1 1 2 2 2 2 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 3 3 3 4 3 3 3 4 3 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4' alert_rule_test: - eval_time: 34m @@ -919,6 +935,7 @@ tests: - exp_labels: job: kube-state-metrics namespace: monitoring + cluster: kubernetes daemonset: node-exporter severity: warning exp_annotations: @@ -930,7 +947,7 @@ tests: - interval: 1m input_series: - - series: 'kubelet_certificate_manager_client_ttl_seconds{job="kubelet",namespace="monitoring",node="minikube"}' + - series: 'kubelet_certificate_manager_client_ttl_seconds{job="kubelet",cluster="kubernetes",namespace="monitoring",node="minikube"}' values: '86400-60x1' alert_rule_test: - eval_time: 0m @@ -939,6 +956,7 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -951,6 +969,7 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -960,6 +979,7 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes node: minikube severity: critical exp_annotations: @@ -969,7 +989,7 @@ tests: - interval: 1m input_series: - - series: 'kubelet_certificate_manager_server_ttl_seconds{job="kubelet",namespace="monitoring",node="minikube"}' + - series: 'kubelet_certificate_manager_server_ttl_seconds{job="kubelet",cluster="kubernetes",namespace="monitoring",node="minikube"}' values: '86400-60x1' alert_rule_test: - eval_time: 0m @@ -978,6 +998,7 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -990,6 +1011,7 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -999,6 +1021,7 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes node: minikube severity: critical exp_annotations: @@ -1008,7 +1031,7 @@ tests: - interval: 1m input_series: - - series: 'kubelet_certificate_manager_client_expiration_renew_errors{job="kubelet",namespace="monitoring",node="minikube"}' + - series: 'kubelet_certificate_manager_client_expiration_renew_errors{job="kubelet",cluster="kubernetes",namespace="monitoring",node="minikube"}' values: '0+1x20' alert_rule_test: - eval_time: 16m @@ -1017,6 +1040,7 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -1027,7 +1051,7 @@ tests: - interval: 1m input_series: - - series: 'kubelet_server_expiration_renew_errors{job="kubelet",namespace="monitoring",node="minikube"}' + - series: 'kubelet_server_expiration_renew_errors{job="kubelet",cluster="kubernetes",namespace="monitoring",node="minikube"}' values: '0+1x20' alert_rule_test: - eval_time: 16m @@ -1036,6 +1060,7 @@ tests: - exp_labels: job: kubelet namespace: monitoring + cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -1045,13 +1070,14 @@ tests: - interval: 1m input_series: - - series: 'kube_job_failed{instance="instance1",condition="true",job="kube-state-metrics",job_name="job-1597623120",namespace="ns1"}' + - series: 'kube_job_failed{instance="instance1",condition="true",job="kube-state-metrics",job_name="job-1597623120",cluster="kubernetes",namespace="ns1"}' values: '1+0x20' alert_rule_test: - eval_time: 15m alertname: KubeJobFailed exp_alerts: - exp_labels: + cluster: "kubernetes" namespace: ns1 job_name: job-1597623120 severity: warning @@ -1065,9 +1091,9 @@ tests: - interval: 1m input_series: - - series: 'kube_job_status_start_time{namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' + - series: 'kube_job_status_start_time{cluster="kubernetes",namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' values: '0+0x200 _x500 0+0x40' - - series: 'kube_job_status_active{namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' + - series: 'kube_job_status_active{cluster="kubernetes",namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' values: '1x200 _x500 1x40' alert_rule_test: - eval_time: 6h @@ -1076,6 +1102,7 @@ tests: alertname: KubeJobNotCompleted exp_alerts: - exp_labels: + cluster: "kubernetes" namespace: ns1 job_name: job1 severity: warning @@ -1086,9 +1113,9 @@ tests: - interval: 1m input_series: - - series: 'kube_job_status_start_time{namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' + - series: 'kube_job_status_start_time{cluster="kubernetes",namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' values: '0+0x740' - - series: 'kube_job_status_active{namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' + - series: 'kube_job_status_active{cluster="kubernetes",namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' values: '1+0x710 0x30' alert_rule_test: - eval_time: 6h @@ -1117,7 +1144,7 @@ tests: - interval: 1m input_series: - - series: 'kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff",namespace="test",pod="static-web",container="script",job="kube-state-metrics"}' + - series: 'kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff",cluster="kubernetes",namespace="test",pod="static-web",container="script",job="kube-state-metrics"}' values: '1 1 stale _x3 1 1 stale _x2 1+0x4 stale' alert_rule_test: - eval_time: 10m # alert hasn't fired @@ -1129,6 +1156,7 @@ tests: severity: "warning" container: "script" job: "kube-state-metrics" + cluster: "kubernetes" namespace: "test" pod: "static-web" reason: "CrashLoopBackOff" @@ -1137,12 +1165,13 @@ tests: runbook_url: "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping" summary: "Pod is crash looping." - eval_time: 20m - alertname: KubePodCrashLooping # alert fired for a period of 5 minutes after resolution because the alert looks back at the last 5 minutes of data and the range vector doesn't take stale samples into account + alertname: KubePodCrashLooping # alert fired for a period of 5 minutes after resolution because the alert looks back at the last 5 minutes of data and the range vector doesn't take stale samples into account exp_alerts: - exp_labels: severity: "warning" container: "script" job: "kube-state-metrics" + cluster: "kubernetes" namespace: "test" pod: "static-web" reason: "CrashLoopBackOff" @@ -1156,15 +1185,15 @@ tests: # When ResourceQuota has both cpu and requests.cpu, min value of those will be taken into account for quota calculation. - interval: 1m input_series: - - series: 'kube_resourcequota{namespace="test", resource="cpu", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="cpu", type="hard", job="kube-state-metrics"}' values: '1000x10' - - series: 'kube_resourcequota{namespace="test", resource="requests.cpu", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="requests.cpu", type="hard", job="kube-state-metrics"}' values: '100x10' - - series: 'kube_resourcequota{namespace="test1", resource="requests.cpu", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test1", resource="requests.cpu", type="hard", job="kube-state-metrics"}' values: '50x10' - - series: 'kube_node_status_allocatable{namespace="monitoring", node="n1", resource="cpu", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n1", resource="cpu", job="kube-state-metrics"}' values: '100x10' - - series: 'kube_node_status_allocatable{namespace="monitoring", node="n2", resource="cpu", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n2", resource="cpu", job="kube-state-metrics"}' values: '100x10' alert_rule_test: - eval_time: 4m @@ -1173,15 +1202,15 @@ tests: alertname: KubeCPUQuotaOvercommit - interval: 1m input_series: - - series: 'kube_resourcequota{namespace="test", resource="cpu", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="cpu", type="hard", job="kube-state-metrics"}' values: '1000x10' - - series: 'kube_resourcequota{namespace="test", resource="requests.cpu", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="requests.cpu", type="hard", job="kube-state-metrics"}' values: '200x10' - - series: 'kube_resourcequota{namespace="test1", resource="requests.cpu", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test1", resource="requests.cpu", type="hard", job="kube-state-metrics"}' values: '200x10' - - series: 'kube_node_status_allocatable{namespace="monitoring", node="n1", resource="cpu", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n1", resource="cpu", job="kube-state-metrics"}' values: '100x10' - - series: 'kube_node_status_allocatable{namespace="monitoring", node="n2", resource="cpu", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n2", resource="cpu", job="kube-state-metrics"}' values: '100x10' alert_rule_test: - eval_time: 4m @@ -1199,15 +1228,15 @@ tests: # When ResourceQuota has both memory and requests.memory, min value of those will be taken into account for quota calculation. - interval: 1m input_series: - - series: 'kube_resourcequota{namespace="test", resource="memory", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="memory", type="hard", job="kube-state-metrics"}' values: '1000x10' - - series: 'kube_resourcequota{namespace="test", resource="requests.memory", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="requests.memory", type="hard", job="kube-state-metrics"}' values: '100x10' - - series: 'kube_resourcequota{namespace="test1", resource="requests.memory", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test1", resource="requests.memory", type="hard", job="kube-state-metrics"}' values: '50x10' - - series: 'kube_node_status_allocatable{namespace="monitoring", node="n1", resource="memory", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n1", resource="memory", job="kube-state-metrics"}' values: '100x10' - - series: 'kube_node_status_allocatable{namespace="monitoring", node="n2", resource="memory", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n2", resource="memory", job="kube-state-metrics"}' values: '100x10' alert_rule_test: - eval_time: 4m @@ -1216,15 +1245,15 @@ tests: alertname: KubeMemoryQuotaOvercommit - interval: 1m input_series: - - series: 'kube_resourcequota{namespace="test", resource="memory", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="memory", type="hard", job="kube-state-metrics"}' values: '1000x10' - - series: 'kube_resourcequota{namespace="test", resource="requests.memory", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="requests.memory", type="hard", job="kube-state-metrics"}' values: '500x10' - - series: 'kube_resourcequota{namespace="test1", resource="requests.memory", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{cluster="kubernetes",namespace="test1", resource="requests.memory", type="hard", job="kube-state-metrics"}' values: '500x10' - - series: 'kube_node_status_allocatable{namespace="monitoring", node="n1", resource="memory", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n1", resource="memory", job="kube-state-metrics"}' values: '10x10' - - series: 'kube_node_status_allocatable{namespace="monitoring", node="n2", resource="memory", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n2", resource="memory", job="kube-state-metrics"}' values: '10x10' alert_rule_test: - eval_time: 4m