diff --git a/k8s/environments/sandbox/values/argocd.yaml b/k8s/environments/sandbox/values/argocd.yaml index e19e00123..577701ae4 100644 --- a/k8s/environments/sandbox/values/argocd.yaml +++ b/k8s/environments/sandbox/values/argocd.yaml @@ -3,7 +3,42 @@ argo-cd: domain: argocd-sandbox.simple.org notifications: argocdUrl: "https://argocd-sandbox.simple.org" + metrics: + enabled: true + serviceMonitor: + enabled: true + controller: + metrics: + enabled: true + serviceMonitor: + enabled: true + dex: + metrics: + enabled: true + serviceMonitor: + enabled: true + repoServer: + metrics: + enabled: true + serviceMonitor: + enabled: true + applicationSet: + metrics: + enabled: true + service: + servicePort: 8080 + serviceMonitor: + enabled: true + redis: + metrics: + enabled: true + serviceMonitor: + enabled: true server: + metrics: + enabled: true + serviceMonitor: + enabled: true ingress: enabled: true annotations: diff --git a/k8s/manifests/kube-prometheus/config/systems-production.libsonnet b/k8s/manifests/kube-prometheus/config/systems-production.libsonnet index 4e4ea67f9..47aa8ba77 100644 --- a/k8s/manifests/kube-prometheus/config/systems-production.libsonnet +++ b/k8s/manifests/kube-prometheus/config/systems-production.libsonnet @@ -165,6 +165,126 @@ }, module: 'http_2xx', }, + { + name: 'dhis2-demo-ecuador-login', + targets: ['https://dhis2-demo-ecuador.simple.org/dhis-web-commons/security/login.action'], + labels: { + service: 'dhis2', + environment: 'sandbox', + country: 'null', + }, + module: 'http_2xx', + }, + { + name: 'dhis2-c61c699a', + targets: ['https://dhis2-c61c699a.simple.org/dhis-web-commons/security/login.action'], + labels: { + service: 'dhis2', + environment: 'sandbox', + country: 'null', + }, + module: 'http_2xx', + }, + { + name: 'dhis2-c61c699a-htn', + targets: ['https://dhis2-htn-dm-demo.simple.org/dhis-web-commons/security/login.action'], + labels: { + service: 'dhis2', + environment: 'sandbox', + country: 'null', + }, + module: 'http_2xx', + }, + { + name: 'dhis2-ecuador-sandbox', + targets: ['https://dhis2-ecuador-sandbox.simple.org/'], + labels: { + service: 'dhis2', + environment: 'sandbox', + country: 'null', + }, + module: 'http_2xx', + }, + { + name: 'dhis2-htn-tracking', + targets: ['https://dhis2-htn-tracking.simple.org/dhis-web-commons/security/login.action'], + labels: { + service: 'dhis2', + environment: 'sandbox', + country: 'null', + }, + module: 'http_2xx', + }, + { + name: 'dhis2-phc', + targets: ['https://dhis2-phc.simple.org/dhis-web-commons/security/login.action'], + labels: { + service: 'dhis2', + environment: 'sandbox', + country: 'null', + }, + module: 'http_2xx', + }, + { + name: 'dhis2-sandbox-01', + targets: ['https://dhis2-sandbox1.simple.org/dhis-web-commons/security/login.action'], + labels: { + service: 'dhis2', + environment: 'sandbox', + country: 'null', + }, + module: 'http_2xx', + }, + { + name: 'dhis2-sandbox-epidemics', + targets: ['https://dhis2-sandbox-epidemics.simple.org/dhis-web-commons/security/login.action'], + labels: { + service: 'dhis2', + environment: 'sandbox', + country: 'null', + }, + module: 'http_2xx', + }, + { + name: 'simple-v1-api', + targets: ['https://api-sandbox.simple.org/'], + labels: { + service: 'simple_server', + environment: 'sandbox', + country: 'null', + }, + module: 'http_2xx', + }, + { + name: 'simple-v1-dashboard', + targets: ['https://dashboard-sandbox.simple.org/'], + labels: { + service: 'simple_dashboard', + environment: 'sandbox', + country: 'null', + }, + module: 'http_2xx', + }, + { + name: 'simple-v1-metabase', + targets: ['https://metabase-sandbox.simple.org/'], + labels: { + service: 'metabase', + environment: 'sandbox', + country: 'null', + }, + module: 'http_2xx', + }, + { + name: 'simple-argocd-sandbox', + targets: ['https://argocd-sandbox.simple.org/'], + labels: { + service: 'argocd', + environment: 'sandbox', + country: 'null', + }, + module: 'http_2xx', + }, ], alertmanager: { externalUrl: 'http://alertmanager.simple.org', diff --git a/k8s/manifests/kube-prometheus/lib/2xx-monitoring.libsonnet b/k8s/manifests/kube-prometheus/lib/2xx-monitoring.libsonnet new file mode 100644 index 000000000..a3465f750 --- /dev/null +++ b/k8s/manifests/kube-prometheus/lib/2xx-monitoring.libsonnet @@ -0,0 +1,241 @@ +local addMixin = (import 'kube-prometheus/lib/mixin.libsonnet'); + +local prometheusRules = { + prometheusRules+:: { + groups: [ + { + name: 'SandboxDownalerts.rules', + rules: [ + { + alert: 'SandboxEnvironmentDown', + expr: ||| + up{environment="sandbox"} == 0 + |||, + for: '5m', + labels: { + severity: 'critical', + }, + annotations: { + summary: 'Sandbox environment is down', + description: 'The sandbox environment instance {{ $labels.instance }} has been down for more than 5 minutes.', + } + } + ], + }, + ], + }, +}; +local grafanaDashboards = { + grafanaDashboards: { + '2xx-monitoring.json': { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [], + "title": "Monitoring API Response", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineInterpolation": "linear", + "lineWidth": 1, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (country, environment, instance) (probe_http_status_code == 200)", + "refId": "A" + } + ], + "title": "HTTP 2xx Monitoring", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineInterpolation": "linear", + "lineWidth": 1, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 3, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "avg by (country, environment, instance) (probe_duration_seconds)", + "refId": "B" + } + ], + "title": "HTTP Response Time (seconds)", + "type": "timeseries" + } + ], + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "systems-production", + "value": "P4D7C5C41A3558C30" + }, + "hide": 0, + "name": "datasource", + "query": "prometheus", + "refresh": 1, + "type": "datasource" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timezone": "browser", + "title": "2xx Monitoring (Sandbox + Production)", + "uid": "monitoring-2xx", + "version": 1, + "weekStart": "" + } + } +}; + +local mixin = addMixin({ + name: '2xx-monitoring', + dashboardFolder: 'Endpoints Monitoring', + mixin: prometheusRules + grafanaDashboards, +}); + +{ + grafanaDashboards: mixin.grafanaDashboards, + prometheusRules: mixin.prometheusRules, +} diff --git a/k8s/manifests/kube-prometheus/monitoring.jsonnet b/k8s/manifests/kube-prometheus/monitoring.jsonnet index 6efb765eb..d45916949 100644 --- a/k8s/manifests/kube-prometheus/monitoring.jsonnet +++ b/k8s/manifests/kube-prometheus/monitoring.jsonnet @@ -12,6 +12,7 @@ local loki = (import 'lib/loki.libsonnet'); local sendgrid = (import 'lib/sendgrid.libsonnet'); local sslCertificateStatus = (import 'lib/ssl-certificate-status.libsonnet'); local blackboxProbes = (import 'lib/blackbox-probe.libsonnet'); +local http2xxMonitoring = (import 'lib/2xx-monitoring.libsonnet'); local environment = std.extVar('ENVIRONMENT'); local namespace = 'monitoring'; @@ -43,6 +44,7 @@ local grafanaDashboards = ingressNginx.grafanaDashboards + simpleServer.grafanaDashboards + loki.grafanaDashboards + + http2xxMonitoring.grafanaDashboards + (if enableDhis2Dashboards then dhis2Server.grafanaDashboards else {}); local blackboxProbeMonitors =