Skip to content

Commit

Permalink
Add Node Problem Detector dashboard. (#401)
Browse files Browse the repository at this point in the history
  • Loading branch information
whites11 authored Nov 9, 2023
1 parent 0ccd67d commit e845aa3
Show file tree
Hide file tree
Showing 2 changed files with 304 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added

- Add Nginx admission controller dashboard.
- Add Node Problem Detector dashboard.

## [3.2.4] - 2023-11-06

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,303 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 116,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"gridPos": {
"h": 4,
"w": 24,
"x": 0,
"y": 0
},
"id": 5,
"options": {
"code": {
"language": "plaintext",
"showLineNumbers": false,
"showMiniMap": false
},
"content": "# How to feed data into this dashboard\n\nThis dashboard displays data providede by the `node-problem-detector` managed app.\nPlease install it from the playground catalog to get it going.\n\nWARNING: installing the app on vintage clusters >= 19.3.0 will enable automatic node termination when `node-problem-detector` finds problems. While we tested this, it is still considered an experimental feature.",
"mode": "markdown"
},
"pluginVersion": "10.1.5",
"type": "text"
},
{
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 4
},
"id": 3,
"panels": [],
"repeat": "cluster_id",
"repeatDirection": "h",
"title": "Cluster $cluster_id",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 12,
"w": 24,
"x": 0,
"y": 5
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "sum(problem_gauge{cluster_id=\"$cluster_id\"}) by (type)",
"instant": false,
"legendFormat": "{{reason}}",
"range": true,
"refId": "A"
}
],
"title": "Node problems detected on $cluster_id",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "custom.hidden",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "Time"
},
"properties": [
{
"id": "custom.hidden",
"value": true
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 17
},
"id": 4,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"pluginVersion": "10.1.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(problem_gauge{cluster_id=\"$cluster_id\"}) by (node,type) > 0",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "Current Problems",
"type": "table"
}
],
"refresh": "5s",
"schemaVersion": 38,
"style": "dark",
"tags": [
"owner:team-phoenix"
],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "vr7i8",
"value": "vr7i8"
},
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"definition": "label_values(problem_counter,cluster_id)",
"hide": 0,
"includeAll": false,
"label": "Cluster",
"multi": true,
"name": "cluster_id",
"options": [],
"query": {
"query": "label_values(problem_counter,cluster_id)",
"refId": "PrometheusVariableQueryEditor-VariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"type": "query"
}
]
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Node Problem Detector",
"uid": "f5fc3e61-0b36-40ba-bcc8-f0d9ecbe2900",
"version": 1,
"weekStart": ""
}

0 comments on commit e845aa3

Please sign in to comment.