-
Notifications
You must be signed in to change notification settings - Fork 59
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Enable postgres-exporter by default from the timescaledb-single Helm
chart. - Add in postgres-exporter mixin alerts and Grafana dashboard - Fix and merge how we build and generate mixin dashboards and alerts
- Loading branch information
Showing
9 changed files
with
1,764 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
groups: | ||
- name: PostgreSQL | ||
rules: | ||
- alert: PostgreSQLMaxConnectionsReached | ||
annotations: | ||
description: '{{ $labels.instance }} is exceeding the currently configured maximum | ||
Postgres connection limit (current value: {{ $value }}s). Services may be | ||
degraded - please take immediate action (you probably need to increase max_connections | ||
in the Docker image and re-deploy.' | ||
summary: '{{ $labels.instance }} has maxed out Postgres connections.' | ||
expr: | | ||
sum by (instance) (pg_stat_activity_count{}) | ||
>= | ||
sum by (instance) (pg_settings_max_connections{}) | ||
- | ||
sum by (instance) (pg_settings_superuser_reserved_connections{}) | ||
for: 1m | ||
labels: | ||
severity: warning | ||
- alert: PostgreSQLHighConnections | ||
annotations: | ||
description: '{{ $labels.instance }} is exceeding 80% of the currently configured | ||
maximum Postgres connection limit (current value: {{ $value }}s). Please check | ||
utilization graphs and confirm if this is normal service growth, abuse or | ||
an otherwise temporary condition or if new resources need to be provisioned | ||
(or the limits increased, which is mostly likely).' | ||
summary: '{{ $labels.instance }} is over 80% of max Postgres connections.' | ||
expr: | | ||
sum by (instance) (pg_stat_activity_count{}) | ||
> | ||
( | ||
sum by (instance) (pg_settings_max_connections{}) | ||
- | ||
sum by (instance) (pg_settings_superuser_reserved_connections{}) | ||
) * 0.8 | ||
for: 10m | ||
labels: | ||
severity: warning | ||
- alert: PostgreSQLDown | ||
annotations: | ||
description: '{{ $labels.instance }} is rejecting query requests from the exporter, | ||
and thus probably not allowing DNS requests to work either. User services | ||
should not be effected provided at least 1 node is still alive.' | ||
summary: 'PostgreSQL is not processing queries: {{ $labels.instance }}' | ||
expr: pg_up{} != 1 | ||
for: 1m | ||
labels: | ||
severity: warning | ||
- alert: PostgreSQLSlowQueries | ||
annotations: | ||
description: 'PostgreSQL high number of slow queries {{ $labels.cluster }} for | ||
database {{ $labels.datname }} with a value of {{ $value }} ' | ||
summary: 'PostgreSQL high number of slow on {{ $labels.cluster }} for database | ||
{{ $labels.datname }} ' | ||
expr: | | ||
avg by (datname) ( | ||
rate ( | ||
pg_stat_activity_max_tx_duration{datname!~"template.*",}[2m] | ||
) | ||
) > 2 * 60 | ||
for: 2m | ||
labels: | ||
severity: warning | ||
- alert: PostgreSQLQPS | ||
annotations: | ||
description: PostgreSQL high number of queries per second on {{ $labels.cluster | ||
}} for database {{ $labels.datname }} with a value of {{ $value }} | ||
summary: PostgreSQL high number of queries per second {{ $labels.cluster }} | ||
for database {{ $labels.datname }} | ||
expr: | | ||
avg by (datname) ( | ||
irate( | ||
pg_stat_database_xact_commit{datname!~"template.*",}[5m] | ||
) | ||
+ | ||
irate( | ||
pg_stat_database_xact_rollback{datname!~"template.*",}[5m] | ||
) | ||
) > 10000 | ||
for: 5m | ||
labels: | ||
severity: warning | ||
- alert: PostgreSQLCacheHitRatio | ||
annotations: | ||
description: PostgreSQL low on cache hit rate on {{ $labels.cluster }} for database | ||
{{ $labels.datname }} with a value of {{ $value }} | ||
summary: PostgreSQL low cache hit rate on {{ $labels.cluster }} for database | ||
{{ $labels.datname }} | ||
expr: | | ||
avg by (datname) ( | ||
rate(pg_stat_database_blks_hit{datname!~"template.*",}[5m]) | ||
/ | ||
( | ||
rate( | ||
pg_stat_database_blks_hit{datname!~"template.*",}[5m] | ||
) | ||
+ | ||
rate( | ||
pg_stat_database_blks_read{datname!~"template.*",}[5m] | ||
) | ||
) | ||
) < 0.98 | ||
for: 5m | ||
labels: | ||
severity: warning |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.