From e4fe0ede108bab72e97fc10002cb5e2659659137 Mon Sep 17 00:00:00 2001 From: Ryan Geyer Date: Wed, 24 Apr 2024 15:47:08 -0700 Subject: [PATCH] Overdue commit of migration for node/pod resources dashboard --- dashboards/resources/node.libsonnet | 373 ++++++++++++++++++---------- 1 file changed, 241 insertions(+), 132 deletions(-) diff --git a/dashboards/resources/node.libsonnet b/dashboards/resources/node.libsonnet index 55ac10b5d..54034420a 100644 --- a/dashboards/resources/node.libsonnet +++ b/dashboards/resources/node.libsonnet @@ -1,154 +1,263 @@ -local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet'; -local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet'; -local template = grafana.template; +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; -{ - grafanaDashboards+:: { - local clusterTemplate = - template.new( - name='cluster', - datasource='$datasource', - query='label_values(up{%(kubeStateMetricsSelector)s}, %(clusterLabel)s)' % $._config, - current='', - hide=if $._config.showMultiCluster then '' else '2', - refresh=2, - includeAll=false, - sort=1 - ), +local fieldOverride = g.panel.timeSeries.fieldOverride; +local prometheus = g.query.prometheus; +local table = g.panel.table; +local timeSeries = g.panel.timeSeries; +local var = g.dashboard.variable; - local nodeTemplate = - template.new( - name='node', - datasource='$datasource', - query='label_values(kube_node_info{%(clusterLabel)s="$cluster"}, node)' % $._config, - current='', - hide='', - refresh=2, - includeAll=false, - multi=true, - sort=1 - ), +{ + local tsPanel = + timeSeries { + new(title): + timeSeries.new(title) + + timeSeries.options.legend.withShowLegend() + + timeSeries.options.legend.withAsTable() + + timeSeries.options.legend.withDisplayMode('table') + + timeSeries.options.legend.withPlacement('right') + + timeSeries.options.legend.withCalcs(['lastNotNull']) + + timeSeries.options.tooltip.withMode('single') + + timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + timeSeries.fieldConfig.defaults.custom.withSpanNulls(true) + + timeSeries.queryOptions.withInterval($._config.grafanaK8s.minimumTimeInterval), + }, + grafanaDashboards+:: { 'k8s-resources-node.json': - local tableStyles = { + local variables = { + datasource: + var.datasource.new('datasource', 'prometheus') + + var.datasource.withRegex($._config.datasourceFilterRegex) + + var.datasource.generalOptions.showOnDashboard.withLabelAndValue() + + var.datasource.generalOptions.withLabel('Data source') + + { + current: { + selected: true, + text: $._config.datasourceName, + value: $._config.datasourceName, + }, + }, + cluster: + var.query.new('cluster') + + var.query.withDatasourceFromVariable(self.datasource) + + var.query.queryTypes.withLabelValues( + $._config.clusterLabel, + 'up{%(kubeStateMetricsSelector)s}' % $._config + ) + + var.query.generalOptions.withLabel('cluster') + + var.query.refresh.onTime() + + ( + if $._config.showMultiCluster + then var.query.generalOptions.showOnDashboard.withLabelAndValue() + else var.query.generalOptions.showOnDashboard.withNothing() + ) + + var.query.withSort(type='alphabetical'), + node: + var.query.new('node') + + var.query.withDatasourceFromVariable(self.datasource) + + var.query.queryTypes.withLabelValues( + 'node', + 'kube_node_info{%(clusterLabel)s="$cluster"}' % $._config + ) + + var.query.generalOptions.withLabel('node') + + var.query.refresh.onTime() + + var.query.generalOptions.showOnDashboard.withLabelAndValue() + + var.query.selectionOptions.withMulti(true), + }; + + local links = { pod: { - alias: 'Pod', + title: 'Drill down to pods', + url: '%(prefix)s/d/%(uid)s/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}' % { + uid: $._config.grafanaDashboardIDs['k8s-resources-pod.json'], + prefix: $._config.grafanaK8s.linkPrefix, + }, }, }; - g.dashboard( - '%(dashboardNamePrefix)sCompute Resources / Node (Pods)' % $._config.grafanaK8s, - uid=($._config.grafanaDashboardIDs['k8s-resources-node.json']), - datasource_regex=$._config.datasourceFilterRegex, - datasource=$._config.datasourceName, - ) - .addRow( - g.row('CPU Usage') - .addPanel( - g.panel('CPU Usage') + - g.queryPanel([ + local panels = [ + tsPanel.new('CPU Usage') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', 'sum(kube_node_status_capacity{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"})' % $._config, + ) + + prometheus.withLegendFormat('max capacity'), + + prometheus.new( + '${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - ], [ - 'max capacity', - '{{pod}}', - ]) + - g.stack + + ) + + prometheus.withLegendFormat('{{pod}}'), + ]) + + tsPanel.fieldConfig.defaults.custom.withStacking({ mode: 'normal' }) + + tsPanel.standardOptions.withOverrides([ + fieldOverride.byName.new('max capacity') + + fieldOverride.byName.withPropertiesFromOptions( + timeSeries.standardOptions.color.withMode('fixed') + + timeSeries.standardOptions.color.withFixedColor('red') + ) + + fieldOverride.byName.withProperty('custom.stacking', { mode: 'none' }) + // This effectively "hides" max capacity from the panel. It shows only in the legend, showing the max capacity. In the "legacy" graph panel + // the max capacity could be reselected and thus shown on the timeseries with a dotted line. This is no longer possible. + + fieldOverride.byName.withProperty('custom.hideFrom', { tooltip: true, viz: true, legend: false }) + + fieldOverride.byName.withProperty('custom.lineStyle', { fill: 'dash', dash: [10, 10] }), + ]), + + table.new('CPU Quota') + + table.queryOptions.withTargets([ + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + ]) + + table.queryOptions.withTransformations([ + table.queryOptions.transformation.withId('joinByField') + + table.queryOptions.transformation.withOptions({ + byField: 'pod', + mode: 'outer', + }), + + table.queryOptions.transformation.withId('organize') + + table.queryOptions.transformation.withOptions({ + renameByName: { + pod: 'Pod', + 'Value #A': 'CPU Usage', + 'Value #B': 'CPU Requests', + 'Value #C': 'CPU Requests %', + 'Value #D': 'CPU Limits', + 'Value #E': 'CPU Limits %', + }, + excludeByName: { + Time: true, + 'Time 1': true, + 'Time 2': true, + 'Time 3': true, + 'Time 4': true, + 'Time 5': true, + }, + }), + ]) + + table.standardOptions.withOverrides([ { - seriesOverrides: [ + matcher: { + id: 'byRegexp', + options: '/%/', + }, + properties: [ { - alias: 'max capacity', - color: '#F2495C', - fill: 0, - hideTooltip: true, - legend: true, - linewidth: 2, - stack: false, - hiddenSeries: true, - dashes: true, + id: 'unit', + value: 'percentunit', }, ], }, - ) - ) - .addRow( - g.row('CPU Quota') - .addPanel( - g.panel('CPU Quota') + - g.tablePanel([ - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - ], tableStyles { - 'Value #A': { alias: 'CPU Usage' }, - 'Value #B': { alias: 'CPU Requests' }, - 'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'CPU Limits' }, - 'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' }, - }) - ) - ) - .addRow( - g.row('Memory Usage') - .addPanel( - g.panel('Memory Usage (w/o cache)') + - // Like above, without page cache - g.queryPanel([ - 'sum(kube_node_status_capacity{%(clusterLabel)s="$cluster", node=~"$node", resource="memory"})' % $._config, - 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node", container!=""}) by (pod)' % $._config, - ], [ - 'max capacity', - '{{pod}}', - ]) + - g.stack + - { yaxes: g.yaxes('bytes') } + { - seriesOverrides: [ + matcher: { + id: 'byName', + options: 'Pod', + }, + properties: [ { - alias: 'max capacity', - color: '#F2495C', - fill: 0, - hideTooltip: true, - legend: true, - linewidth: 2, - stack: false, - hiddenSeries: true, - dashes: true, + id: 'links', + value: [links.pod], }, ], }, - ) - ) - .addRow( - g.row('Memory Quota') - .addPanel( - g.panel('Memory Quota') + - g.tablePanel([ - 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_memory_rss{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_memory_cache{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_memory_swap{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, - ], tableStyles { - 'Value #A': { alias: 'Memory Usage', unit: 'bytes' }, - 'Value #B': { alias: 'Memory Requests', unit: 'bytes' }, - 'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'Memory Limits', unit: 'bytes' }, - 'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' }, - 'Value #F': { alias: 'Memory Usage (RSS)', unit: 'bytes' }, - 'Value #G': { alias: 'Memory Usage (Cache)', unit: 'bytes' }, - 'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' }, - }) - ) - ) + { - templating+: { - list+: [clusterTemplate, nodeTemplate], - }, - }, + ]), + + tsPanel.new('Memory Usage (w/o cache)') + + tsPanel.standardOptions.withUnit('bytes') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'sum(kube_node_status_capacity{%(clusterLabel)s="$cluster", node=~"$node", resource="memory"})' % $._config, + ) + + prometheus.withLegendFormat('max capacity'), + + prometheus.new( + '${datasource}', + 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node", container!=""}) by (pod)' % $._config, + ) + + prometheus.withLegendFormat('{{pod}}'), + ]) + + tsPanel.fieldConfig.defaults.custom.withStacking({ mode: 'normal' }) + + tsPanel.standardOptions.withOverrides([ + fieldOverride.byName.new('max capacity') + + fieldOverride.byName.withPropertiesFromOptions( + timeSeries.standardOptions.color.withMode('fixed') + + timeSeries.standardOptions.color.withFixedColor('red') + ) + + fieldOverride.byName.withProperty('custom.stacking', { mode: 'none' }) + // This effectively "hides" max capacity from the panel. It shows only in the legend, showing the max capacity. In the "legacy" graph panel + // the max capacity could be reselected and thus shown on the timeseries with a dotted line. This is no longer possible. + + fieldOverride.byName.withProperty('custom.hideFrom', { tooltip: true, viz: true, legend: false }) + + fieldOverride.byName.withProperty('custom.lineStyle', { fill: 'dash', dash: [10, 10] }), + ]), + ]; + + g.dashboard.new('%(dashboardNamePrefix)sCompute Resources / Node (Pods)' % $._config.grafanaK8s) + + g.dashboard.withUid($._config.grafanaDashboardIDs['k8s-resources-node.json']) + + g.dashboard.withTags($._config.grafanaK8s.dashboardTags) + + g.dashboard.withEditable(false) + + g.dashboard.time.withFrom('now-1h') + + g.dashboard.time.withTo('now') + + g.dashboard.withRefresh($._config.grafanaK8s.refresh) + + g.dashboard.withVariables([variables.datasource, variables.cluster, variables.node]) + + g.dashboard.withPanels(g.util.grid.wrapPanels(panels, panelWidth=24, panelHeight=6)), + + + // local tableStyles = { + // pod: { + // alias: 'Pod', + // }, + // }; + + // g.dashboard( + // '%(dashboardNamePrefix)sCompute Resources / Node (Pods)' % $._config.grafanaK8s, + // uid=($._config.grafanaDashboardIDs['k8s-resources-node.json']), + // datasource_regex=$._config.datasourceFilterRegex, + // datasource=$._config.datasourceName, + // ) + // .addRow( + // g.row('Memory Quota') + // .addPanel( + // g.panel('Memory Quota') + + // g.tablePanel([ + // 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, + // 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + // 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + // 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + // 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + // 'sum(node_namespace_pod_container:container_memory_rss{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, + // 'sum(node_namespace_pod_container:container_memory_cache{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, + // 'sum(node_namespace_pod_container:container_memory_swap{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, + // ], tableStyles { + // 'Value #A': { alias: 'Memory Usage', unit: 'bytes' }, + // 'Value #B': { alias: 'Memory Requests', unit: 'bytes' }, + // 'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' }, + // 'Value #D': { alias: 'Memory Limits', unit: 'bytes' }, + // 'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' }, + // 'Value #F': { alias: 'Memory Usage (RSS)', unit: 'bytes' }, + // 'Value #G': { alias: 'Memory Usage (Cache)', unit: 'bytes' }, + // 'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' }, + // }) + // ) + // ) + { + // templating+: { + // list+: [clusterTemplate, nodeTemplate], + // }, + // }, }, }