From 61da7db1b65b308d1f51deea578bf0fffdec6a46 Mon Sep 17 00:00:00 2001 From: Georgy Moiseev Date: Thu, 5 Dec 2024 13:56:52 +0300 Subject: [PATCH] dashboard: fix Prometheus TDG moving average Before this patch, Prometheus average panels (histogram or summary `metric_sum / metric_count`) were displaying instance runtime average info. This info is rather useless since, in case of instance lifetime being longer than several dozens of metrics collect intervals (which happens often), computed average becomes almost constant. Prometheus itself advises to use moving average for this. InfluxDB panels already implement moving averages, so there's no need to update them. 1. https://prometheus.io/docs/practices/histograms/#count-and-sum-of-observations Closes #237 --- CHANGELOG.md | 6 ++++++ dashboard/panels/tdg/graphql.libsonnet | 2 +- dashboard/panels/tdg/tasks.libsonnet | 4 ++-- dashboard/panels/tdg/tuples.libsonnet | 2 +- tests/Prometheus/dashboard_tdg_compiled.json | 14 +++++++------- 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 555854c..d1e88f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Fixed +- Prometheus TDG average panels displaying lifetime average instead of a moving one (#237) + + ## [3.2.0] - 2024-10-04 Grafana revisions: - Tarantool 3: diff --git a/dashboard/panels/tdg/graphql.libsonnet b/dashboard/panels/tdg/graphql.libsonnet index 7b020c4..c286911 100644 --- a/dashboard/panels/tdg/graphql.libsonnet +++ b/dashboard/panels/tdg/graphql.libsonnet @@ -37,7 +37,7 @@ local prometheus = grafana.prometheus; prometheus.target( expr=std.format( ||| - %(metrics_prefix)s%(metric_name_sum)s{%(filters)s} / %(metrics_prefix)s%(metric_name_count)s{%(filters)s} + rate(%(metrics_prefix)s%(metric_name_sum)s{%(filters)s}[$__rate_interval]) / rate(%(metrics_prefix)s%(metric_name_count)s{%(filters)s})[$__rate_interval]) |||, { metrics_prefix: cfg.metrics_prefix, diff --git a/dashboard/panels/tdg/tasks.libsonnet b/dashboard/panels/tdg/tasks.libsonnet index aaa7880..b54ccab 100644 --- a/dashboard/panels/tdg/tasks.libsonnet +++ b/dashboard/panels/tdg/tasks.libsonnet @@ -83,7 +83,7 @@ local prometheus = grafana.prometheus; prometheus.target( expr=std.format( ||| - %(metrics_prefix)s%(metric_name_sum)s{%(filters)s} / %(metrics_prefix)s%(metric_name_count)s{%(filters)s} + rate(%(metrics_prefix)s%(metric_name_sum)s{%(filters)s}[$__rate_interval]) / rate(%(metrics_prefix)s%(metric_name_count)s{%(filters)s}[$__rate_interval]) |||, { metrics_prefix: cfg.metrics_prefix, @@ -196,7 +196,7 @@ local prometheus = grafana.prometheus; prometheus.target( expr=std.format( ||| - %(metrics_prefix)s%(metric_name_sum)s{%(filters)s} / %(metrics_prefix)s%(metric_name_count)s{%(filters)s} + rate(%(metrics_prefix)s%(metric_name_sum)s{%(filters)s}[$__rate_interval]) / rate(%(metrics_prefix)s%(metric_name_count)s{%(filters)s}[$__rate_interval]) |||, { metrics_prefix: cfg.metrics_prefix, diff --git a/dashboard/panels/tdg/tuples.libsonnet b/dashboard/panels/tdg/tuples.libsonnet index 372da1c..6483753 100644 --- a/dashboard/panels/tdg/tuples.libsonnet +++ b/dashboard/panels/tdg/tuples.libsonnet @@ -18,7 +18,7 @@ local prometheus = grafana.prometheus; prometheus.target( expr=std.format( ||| - %(metrics_prefix)s%(metric_name_sum)s{%(filters)s} / %(metrics_prefix)s%(metric_name_count)s{%(filters)s} + rate(%(metrics_prefix)s%(metric_name_sum)s{%(filters)s}[$__rate_interval]) / rate(%(metrics_prefix)s%(metric_name_count)s{%(filters)s}[$__rate_interval]) |||, { metrics_prefix: cfg.metrics_prefix, diff --git a/tests/Prometheus/dashboard_tdg_compiled.json b/tests/Prometheus/dashboard_tdg_compiled.json index 5d179cd..19f6f6b 100644 --- a/tests/Prometheus/dashboard_tdg_compiled.json +++ b/tests/Prometheus/dashboard_tdg_compiled.json @@ -18917,7 +18917,7 @@ "steppedLine": false, "targets": [ { - "expr": "tdg_scanned_tuples_sum{alias=~\"$alias\",job=~\"$job\"} / tdg_scanned_tuples_count{alias=~\"$alias\",job=~\"$job\"}\n", + "expr": "rate(tdg_scanned_tuples_sum{alias=~\"$alias\",job=~\"$job\"}[$__rate_interval]) / rate(tdg_scanned_tuples_count{alias=~\"$alias\",job=~\"$job\"}[$__rate_interval])\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{type_name}} — {{alias}}", @@ -19004,7 +19004,7 @@ "steppedLine": false, "targets": [ { - "expr": "tdg_returned_tuples_sum{alias=~\"$alias\",job=~\"$job\"} / tdg_returned_tuples_count{alias=~\"$alias\",job=~\"$job\"}\n", + "expr": "rate(tdg_returned_tuples_sum{alias=~\"$alias\",job=~\"$job\"}[$__rate_interval]) / rate(tdg_returned_tuples_count{alias=~\"$alias\",job=~\"$job\"}[$__rate_interval])\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{type_name}} — {{alias}}", @@ -19932,7 +19932,7 @@ "steppedLine": false, "targets": [ { - "expr": "tdg_graphql_query_time_sum{alias=~\"$alias\",job=~\"$job\"} / tdg_graphql_query_time_count{alias=~\"$alias\",job=~\"$job\"}\n", + "expr": "rate(tdg_graphql_query_time_sum{alias=~\"$alias\",job=~\"$job\"}[$__rate_interval]) / rate(tdg_graphql_query_time_count{alias=~\"$alias\",job=~\"$job\"})[$__rate_interval])\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{operation_name}} ({{schema}}, {{entity}}) — {{alias}}", @@ -20193,7 +20193,7 @@ "steppedLine": false, "targets": [ { - "expr": "tdg_graphql_mutation_time_sum{alias=~\"$alias\",job=~\"$job\"} / tdg_graphql_mutation_time_count{alias=~\"$alias\",job=~\"$job\"}\n", + "expr": "rate(tdg_graphql_mutation_time_sum{alias=~\"$alias\",job=~\"$job\"}[$__rate_interval]) / rate(tdg_graphql_mutation_time_count{alias=~\"$alias\",job=~\"$job\"})[$__rate_interval])\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{operation_name}} ({{schema}}, {{entity}}) — {{alias}}", @@ -23385,7 +23385,7 @@ "steppedLine": false, "targets": [ { - "expr": "tdg_jobs_execution_time_sum{alias=~\"$alias\",job=~\"$job\"} / tdg_jobs_execution_time_count{alias=~\"$alias\",job=~\"$job\"}\n", + "expr": "rate(tdg_jobs_execution_time_sum{alias=~\"$alias\",job=~\"$job\"}[$__rate_interval]) / rate(tdg_jobs_execution_time_count{alias=~\"$alias\",job=~\"$job\"}[$__rate_interval])\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{name}} — {{alias}}", @@ -23907,7 +23907,7 @@ "steppedLine": false, "targets": [ { - "expr": "tdg_tasks_execution_time_sum{alias=~\"$alias\",job=~\"$job\"} / tdg_tasks_execution_time_count{alias=~\"$alias\",job=~\"$job\"}\n", + "expr": "rate(tdg_tasks_execution_time_sum{alias=~\"$alias\",job=~\"$job\"}[$__rate_interval]) / rate(tdg_tasks_execution_time_count{alias=~\"$alias\",job=~\"$job\"}[$__rate_interval])\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{name}} ({{kind}}) — {{alias}}", @@ -24342,7 +24342,7 @@ "steppedLine": false, "targets": [ { - "expr": "tdg_system_tasks_execution_time_sum{alias=~\"$alias\",job=~\"$job\"} / tdg_system_tasks_execution_time_count{alias=~\"$alias\",job=~\"$job\"}\n", + "expr": "rate(tdg_system_tasks_execution_time_sum{alias=~\"$alias\",job=~\"$job\"}[$__rate_interval]) / rate(tdg_system_tasks_execution_time_count{alias=~\"$alias\",job=~\"$job\"}[$__rate_interval])\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{name}} ({{kind}}) — {{alias}}",