From 9435ba217d64fc1eebcdef908256e7ece13f66f0 Mon Sep 17 00:00:00 2001 From: taylanisikdemir Date: Tue, 10 Dec 2024 16:35:11 -0800 Subject: [PATCH] Integrate matching simulation metrics with grafana (#6553) --- ...cker-compose-local-matching-simulation.yml | 36 +- docker/buildkite/grafana/grafana.ini | 3 + .../dashboards/cadence-client.json | 2409 ++++++++ .../dashboards/cadence-matching.json | 1424 +++++ .../dashboards/cadence-persistence.json | 597 ++ .../dashboards/cadence-server.json | 5365 +++++++++++++++++ .../provisioning/dashboards/default.yaml | 9 + .../provisioning/datasources/default.yaml | 6 + docker/buildkite/prometheus/.gitignore | 1 + .../matching_simulation_prometheus.yml | 14 + host/matching_simulation_test.go | 2 + host/onebox.go | 35 +- scripts/run_matching_simulator.sh | 6 +- 13 files changed, 9903 insertions(+), 4 deletions(-) create mode 100644 docker/buildkite/grafana/grafana.ini create mode 100644 docker/buildkite/grafana/provisioning/dashboards/cadence-client.json create mode 100644 docker/buildkite/grafana/provisioning/dashboards/cadence-matching.json create mode 100644 docker/buildkite/grafana/provisioning/dashboards/cadence-persistence.json create mode 100644 docker/buildkite/grafana/provisioning/dashboards/cadence-server.json create mode 100644 docker/buildkite/grafana/provisioning/dashboards/default.yaml create mode 100644 docker/buildkite/grafana/provisioning/datasources/default.yaml create mode 100644 docker/buildkite/prometheus/.gitignore create mode 100644 docker/buildkite/prometheus/matching_simulation_prometheus.yml diff --git a/docker/buildkite/docker-compose-local-matching-simulation.yml b/docker/buildkite/docker-compose-local-matching-simulation.yml index 89a6eb8b6a9..41861fda40c 100644 --- a/docker/buildkite/docker-compose-local-matching-simulation.yml +++ b/docker/buildkite/docker-compose-local-matching-simulation.yml @@ -17,6 +17,31 @@ services: interval: 15s timeout: 30s retries: 10 + prometheus: + image: prom/prometheus:v3.0.1 + volumes: + - ./prometheus:/etc/prometheus + command: + - '--config.file=/etc/prometheus/matching_simulation_prometheus.yml' + ports: + - '9090:9090' + networks: + services-network: + aliases: + - prometheus + grafana: + image: grafana/grafana:11.4.0 + volumes: + - ./grafana:/etc/grafana + user: "1000" + depends_on: + - prometheus + ports: + - '3000:3000' + networks: + services-network: + aliases: + - grafana matching-simulator: build: @@ -40,8 +65,17 @@ services: - "CASSANDRA=1" - "CASSANDRA_SEEDS=cassandra" depends_on: + prometheus: + condition: service_started + grafana: + condition: service_started cassandra: condition: service_healthy + ports: # expose prometheus ports so they can be scraped + - '8306:8306' + - '8307:8307' + - '8308:8308' + - '8309:8309' volumes: - ../../:/cadence - /cadence/.build/ # ensure we don't mount the build directory @@ -49,7 +83,7 @@ services: networks: services-network: aliases: - - integration-test + - cadence networks: services-network: diff --git a/docker/buildkite/grafana/grafana.ini b/docker/buildkite/grafana/grafana.ini new file mode 100644 index 00000000000..d087b643896 --- /dev/null +++ b/docker/buildkite/grafana/grafana.ini @@ -0,0 +1,3 @@ +[auth.anonymous] +enabled = true +org_role = Admin \ No newline at end of file diff --git a/docker/buildkite/grafana/provisioning/dashboards/cadence-client.json b/docker/buildkite/grafana/provisioning/dashboards/cadence-client.json new file mode 100644 index 00000000000..8cffac640fd --- /dev/null +++ b/docker/buildkite/grafana/provisioning/dashboards/cadence-client.json @@ -0,0 +1,2409 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": null, + "graphTooltip": 0, + "id": 5211598, + "iteration": 1638064520253, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 46, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(cadence_workflow_start_total{namespace=\"$namespace\",domain=\"$domain\"}[2m]))", + "hide": false, + "interval": "", + "legendFormat": "Start", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(cadence_workflow_completed_total{namespace=\"$namespace\",domain=\"$domain\"}[2m]))", + "hide": false, + "interval": "", + "legendFormat": "Completed", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(rate(cadence_workflow_canceled_total{namespace=\"$namespace\",domain=\"$domain\"}[2m]))", + "hide": false, + "interval": "", + "legendFormat": "Canceled", + "refId": "C" + }, + { + "exemplar": true, + "expr": "sum(rate(cadence_workflow_continue_as_new_total_total{namespace=\"$namespace\",domain=\"$domain\"}[2m]))", + "interval": "", + "legendFormat": "Continued As New", + "refId": "F" + }, + { + "exemplar": true, + "expr": "sum(rate(cadence_workflow_signal_with_start_total{namespace=\"$namespace\",domain=\"$domain\"}[2m]))", + "interval": "", + "legendFormat": "Signal With Start", + "refId": "G" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Workflow Start/Success", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "TODO: how to add rate()? https://stackoverflow.com/questions/66286901/how-to-apply-rate-to-expression-with-name", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(count(cadence_workflow_failed{namespace=\"$namespace\"})) by (domain,workflowtype)", + "format": "time_series", + "hide": false, + "interval": "", + "legendFormat": "FailedOnClient", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(count(workflow_failed{cadence_service=\"cadence_history\",namespace=\"$namespace\"})) by (domain,workflowtype)", + "hide": false, + "interval": "", + "legendFormat": "FailedOnServer", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(count(workflow_terminate{cadence_service=\"cadence_history\",namespace=\"$namespace\"})) by (domain,workflowtype)", + "hide": false, + "interval": "", + "legendFormat": "Terminate", + "refId": "C" + }, + { + "exemplar": true, + "expr": "sum(count(workflow_timeout{cadence_service=\"cadence_history\",namespace=\"$namespace\"})) by (domain,workflowtype)", + "hide": false, + "interval": "", + "legendFormat": "Timeout", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Workflow Failure", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(count(cadence_decision_poll_total{namespace=\"$namespace\", Domain=\"$domain\"})) by (domain,tasklist)", + "hide": false, + "interval": "", + "legendFormat": "Total", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(count(cadence_decision_poll_failed{namespace=\"$namespace\", Domain=\"$domain\"})) by (domain,tasklist)", + "hide": false, + "interval": "", + "legendFormat": "Failed", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(count(cadence_decision_poll_no_task{namespace=\"$namespace\", Domain=\"$domain\"})) by (domain,tasklist)", + "interval": "", + "legendFormat": "No Task", + "refId": "C" + }, + { + "exemplar": true, + "expr": "sum(count(cadence_decision_poll_succeed{namespace=\"$namespace\", Domain=\"$domain\"})) by (domain,tasklist)", + "interval": "", + "legendFormat": "Succeed", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Decision Poll Counters", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(cadence_requests_per_tl{namespace=\"$namespace\",operation=\"AddDecisionTask\"}[2m])) by(tasklist,domain)", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Decisions Started Per Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 17 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "avg(cadence_decision_scheduled_to_start_latency_seconds_count) by (Domain, TaskList)", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "exemplar": true, + "expr": "max(cadence_decision_scheduled_to_start_latency_seconds_max) by (Domain, TaskList)", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "B" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.95, sum(rate(cadence_decision_scheduled_to_start_latency_seconds_sum{Domain=\"$domain\",namespace=\"$namespace\"} [5m])) by (le,Domain,TaskList) )", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Decision Scheduled To Start Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 17 + }, + "hiddenSeries": false, + "id": 93, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(cadence_worker_panic{namespace=\"$namespace\"}[2m])) by(namespace,domain)", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(cadence_non_deterministic_error{namespace=\"$namespace\"}[2m])) by(namespace,domain)", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "NonDeterministicError and Worker Panic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 25 + }, + "hiddenSeries": false, + "id": 94, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(count(cadence_sticky_cache_hit{namespace=\"$namespace\",domain=\"$domain\"})) by (Domain, TaskList)/\n(sum(count(cadence_sticky_cache_hit{namespace=\"$namespace\",domain=\"$domain\"})) by (Domain, TaskList)+\nsum(count(cadence_sticky_cache_miss{namespace=\"$namespace\",domain=\"$domain\"})) by (Domain, TaskList))*100", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Workflow Sticky Cache Hit Rate and Miss count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 25 + }, + "hiddenSeries": false, + "id": 95, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(count(cadence_sticky_cache_total_forced_eviction{namespace=\"$namespace\",domain=\"$domain\"})) by (domain)", + "hide": false, + "interval": "", + "legendFormat": "RegularEviction", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(count(cadence_sticky_cache_thread_forced_eviction{namespace=\"$namespace\",domain=\"$domain\"})) by (domain)", + "hide": false, + "interval": "", + "legendFormat": "ForcedEvictionCacheFull", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Workflow Sticky Cache Eviction", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 33 + }, + "hiddenSeries": false, + "id": 96, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.95, sum(avg(cadence_workflow_endtoend_latency_bucket)) by (le,namespace,Domain,TaskList,workflowType))", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "D" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.50, sum(avg(cadence_workflow_endtoend_latency_bucket)) by (le,namespace,Domain,TaskList,workflowType))", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "E" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Workflow End to End Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Workflow", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 44, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 2 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(count(cadence_activity_task_failed{namespace=\"$namespace\",domain=\"$domain\"})) by (activitytype)", + "hide": false, + "interval": "", + "legendFormat": "Failed", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(count(cadence_activity_task_completed{namespace=\"$namespace\",domain=\"$domain\"})) by (activitytype)", + "interval": "", + "legendFormat": "Completed", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(rate(cadence_activity_task_timeouted{namespace=\"$namespace\",domain=\"$domain\"}[2m])) by (activitytype)", + "hide": false, + "interval": "", + "legendFormat": "Timeouted", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Activity Task Operations", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 2 + }, + "hiddenSeries": false, + "id": 47, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "avg(cadence_activity_execution_latency) by (namespace,domain,tasklist,activitytype)", + "hide": false, + "interval": "", + "legendFormat": "avg latency", + "refId": "A" + }, + { + "exemplar": true, + "expr": "max(cadence_activity_execution_latency) by (namespace,domain,tasklist,activitytype)", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Activity Execution Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "hiddenSeries": false, + "id": 97, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(count(cadence_activity_poll_total{namespace=\"$namespace\",domain=\"$domain\"})) by (activitytype)", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(count(cadence_activity_poll_failed{namespace=\"$namespace\",domain=\"$domain\"})) by (activitytype)", + "interval": "", + "legendFormat": "Completed", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(cadence_activity_poll_succeed{namespace=\"$namespace\",domain=\"$domain\"}) by (activitytype)", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "C" + }, + { + "exemplar": true, + "expr": "sum(cadence_activity_poll_no_task{namespace=\"$namespace\",domain=\"$domain\"}) by (activitytype)", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Activity Poll Counters", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "hiddenSeries": false, + "id": 98, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(cadence_requests_per_tl{cadence_service=\"cadence_matching\",namespace=\"$namespace\"}[5m])) by (domain,tasklist)", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "ActivityTasks scheduled per Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 18 + }, + "hiddenSeries": false, + "id": 99, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "avg(cadence_activity_scheduled_to_start_latency_count{Domain=\"$domain\",namespace=\"$namespace\"}) by (Domain, TaskList,ActivityType)", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "exemplar": true, + "expr": "max(cadence_activity_scheduled_to_start_latency_max{Domain=\"$domain\",namespace=\"$namespace\"}) by (Domain,TaskList,ActivityType)", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "B" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.95, max(cadence_activity_scheduled_to_start_latency_sum{Domain=\"$domain\",namespace=\"$namespace\"} ) by (leDomain, TaskList,ActivityType) )", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Activity Scheduled To Start Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 18 + }, + "hiddenSeries": false, + "id": 100, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(count(cadence_activity_execution_failed{namespace=\"$namespace\",domain=\"$domain\"}))by(domain,activitytype)\n", + "hide": false, + "interval": "", + "legendFormat": "ExecFailedOnClient", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(count(cadence_activity_task_panic{namespace=\"$namespace\",domain=\"$domain\"}))by(domain)", + "hide": false, + "interval": "", + "legendFormat": "TaskPanicOnClient", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(count(cadence_activity_task_failed{namespace=\"$namespace\",domain=\"$domain\"}))by(domain,activitytype)", + "hide": false, + "interval": "", + "legendFormat": "TaskFailedOnClient", + "refId": "C" + }, + { + "exemplar": true, + "expr": "sum(count(cadence_activity_task_canceled{namespace=\"$namespace\",domain=\"$domain\"}))by(domain)", + "hide": false, + "interval": "", + "legendFormat": "TaskCanceledOnClient", + "refId": "D" + }, + { + "exemplar": true, + "expr": "sum(count(heartbeat_timeout{cadence_service=\"cadence_history\",namespace=\"$namespace\",domain=\"$domain\"}))by(domain)", + "hide": false, + "interval": "", + "legendFormat": "HeartbeatTimeout", + "refId": "E" + }, + { + "exemplar": true, + "expr": "sum(count(schedule_to_start_timeout{cadence_service=\"cadence_history\",namespace=\"$namespace\",domain=\"$domain\"}))by(domain)", + "hide": false, + "interval": "", + "legendFormat": "ScheduledToStartTimeout", + "refId": "F" + }, + { + "exemplar": true, + "expr": "sum(count(start_to_close_timeout{cadence_service=\"cadence_history\",namespace=\"$namespace\",domain=\"$domain\"}))by(domain)", + "hide": false, + "interval": "", + "legendFormat": "StartToCloseTimeout", + "refId": "G" + }, + { + "exemplar": true, + "expr": "sum(count(schedule_to_close_timeout{cadence_service=\"cadence_history\",namespace=\"$namespace\",domain=\"$domain\"}))by(domain)", + "hide": false, + "interval": "", + "legendFormat": "ScheduledToCloseTimeout", + "refId": "H" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Activity Failure", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Activity", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "id": 102, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 3 + }, + "hiddenSeries": false, + "id": 104, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "(\n1 - count(cadence_error{namespace=\"$namespace\",domain=\"$domain\"})by(domain)/\ncount(cadence_request{namespace=\"$namespace\",domain=\"$domain\"})by(domain)\n) * 100\n\n\n", + "hide": false, + "interval": "", + "legendFormat": "ExecFailedOnClient", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Service API success rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 3 + }, + "hiddenSeries": false, + "id": 105, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(avg_over_time(cadence_latency{Domain=\"$domain\",namespace=\"$namespace\"} [1m])) by (Domain)", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.95, sum(rate(cadence_latency{Domain=\"$domain\",namespace=\"$namespace\"} [5m])) by (le,domain) )", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "B" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.5, sum(rate(cadence_latency{Domain=\"$domain\",namespace=\"$namespace\"} [5m])) by (le,domain) )", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Service API Latency (TODO exclude long-poll APIs)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Service", + "type": "row" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 107, + "panels": [], + "title": "History", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 4 + }, + "hiddenSeries": false, + "id": 108, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile($quantiles, (max by (le, domain) (history_size_bucket{namespace=\"$namespace\"})))*1000000000", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Max History Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 4 + }, + "hiddenSeries": false, + "id": 109, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum by (domain) (history_count_sum{namespace=\"$namespace\",domain!=\"all\"}/history_count_count{namespace=\"$namespace\",domain!=\"all\"})\n*1000000000", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Avg History Length ", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 12 + }, + "hiddenSeries": false, + "id": 110, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "max(histogram_quantile(1, max(event_blob_size_bucket) by (le)))*1000000000", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Max Event Blob Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "", + "schemaVersion": 31, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": false, + "text": "cadence-master", + "value": "cadence-master" + }, + "datasource": "Prometheus", + "definition": "label_values({__name__=~ \"cadence_requests|cadence_decision_scheduled_to_start_latency_seconds_sum\"}, namespace)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values({__name__=~ \"cadence_requests|cadence_decision_scheduled_to_start_latency_seconds_sum\"}, namespace)", + "refId": "Prometheus-namespace-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "all", + "value": "all" + }, + "datasource": "Prometheus", + "definition": "label_values(activity_end_to_end_latency_sum{namespace=\"$namespace\"}, domain)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Domain", + "multi": true, + "name": "domain", + "options": [], + "query": { + "query": "label_values(activity_end_to_end_latency_sum{namespace=\"$namespace\"}, domain)", + "refId": "Prometheus-domain-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "0.99", + "value": "0.99" + }, + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Quantiles", + "multi": false, + "name": "quantiles", + "options": [ + { + "selected": false, + "text": "0.5", + "value": "0.5" + }, + { + "selected": false, + "text": "0.95", + "value": "0.95" + }, + { + "selected": true, + "text": "0.99", + "value": "0.99" + } + ], + "query": "0.5, 0.95, 0.99", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Cadence-Client-Basic", + "uid": "1B0efRyGy2", + "version": 2 +} \ No newline at end of file diff --git a/docker/buildkite/grafana/provisioning/dashboards/cadence-matching.json b/docker/buildkite/grafana/provisioning/dashboards/cadence-matching.json new file mode 100644 index 00000000000..bc76ae1c601 --- /dev/null +++ b/docker/buildkite/grafana/provisioning/dashboards/cadence-matching.json @@ -0,0 +1,1424 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 6, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 9, + "panels": [], + "title": "Add & Poll QPS and Latencies", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum by (tasklist) (\n rate(cadence_requests_per_tl{\n operation=\"AddDecisionTask\"\n }[1m])\n)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "AddDecisionTask QPS per tasklist", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum by (tasklist) (\n rate(cadence_requests_per_tl{\n operation=\"PollForDecisionTask\"\n }[1m])\n)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "PollForDecisionTask QPS per tasklist", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, rate(cadence_latency_per_tl_bucket{operation=\"AddDecisionTask\"}[1m]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "AddDecisionTask p99 latency per tasklist", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, rate(cadence_latency_per_tl_bucket{operation=\"PollForDecisionTask\"}[1m]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "PollForDecisionTask p99 latency per tasklist", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 13, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, rate(cadence_latency_per_tl_bucket{operation=\"AddDecisionTask\"}[1m]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "AddDecisionTask p50 latency per tasklist", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 17 + }, + "id": 14, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, rate(cadence_latency_per_tl_bucket{operation=\"PollForDecisionTask\"}[1m]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "PollForDecisionTask p50 latency per tasklist", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 25 + }, + "id": 10, + "panels": [], + "title": "Sync vs Async Matches", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 17, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum by(tasklist)(rate(syncmatch_latency_per_tl_count{operation=\"AddDecisionTask\"}[1m])) / sum by (tasklist) (rate(cadence_requests_per_tl{operation=\"AddDecisionTask\"}[1m]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Sync Match Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 34 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum by(tasklist) (\n rate(syncmatch_latency_per_tl_sum{operation=\"AddDecisionTask\"}[1m])\n )", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "DecisionTask Sync Matches", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 34 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum by(tasklist) (\n rate(asyncmatch_latency_per_tl_sum{tasklistType=\"decision\"}[1m])\n )", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "DecisionTask Async Matches", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 42 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, rate(syncmatch_latency_per_tl_bucket{operation=\"AddDecisionTask\"}[1m]))\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "DecisionTask Sync Match p99 latency per tasklist", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 42 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, rate(asyncmatch_latency_per_tl_bucket{tasklistType=\"decision\"}[1m]))\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "DecisionTask Async Match p99 latency per tasklist", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 50 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, rate(syncmatch_latency_per_tl_bucket{operation=\"AddDecisionTask\"}[1m]))\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "DecisionTask Sync Match p50 latency per tasklist", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 50 + }, + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, rate(asyncmatch_latency_per_tl_bucket{tasklistType=\"decision\"}[1m]))\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "DecisionTask Async Match p50 latency per tasklist", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 58 + }, + "id": 15, + "panels": [], + "title": "Backlog", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 59 + }, + "id": 16, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum by(tasklist) (rate(task_backlog_per_tl{tasklistType=\"decision\"}[1m]))\n", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "DecisionTask Backlog per tasklist", + "type": "timeseries" + } + ], + "preload": false, + "schemaVersion": 40, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "2024-12-10T17:20:49.632Z", + "to": "2024-12-10T17:26:01.274Z" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Cadence Matching", + "uid": "de6iyddpu6nswd", + "version": 2, + "weekStart": "" + } diff --git a/docker/buildkite/grafana/provisioning/dashboards/cadence-persistence.json b/docker/buildkite/grafana/provisioning/dashboards/cadence-persistence.json new file mode 100644 index 00000000000..c9e167a4d84 --- /dev/null +++ b/docker/buildkite/grafana/provisioning/dashboards/cadence-persistence.json @@ -0,0 +1,597 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 4, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [ + "mean", + "sum" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Total", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(operation) (rate(persistence_requests{operation=~\"$operation\"}[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{operation}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Requests Per Operation", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "mean", + "sum" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Total", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(operation, shard_id) (rate(persistence_requests_per_shard{operation=~\"$operation\"}[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{operation}} - {{shard_id}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Requests Per Operation Per Shard", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "avg by(operation) (rate(persistence_latency_sum{operation=~\"$operation\"}[$__rate_interval]) / rate(persistence_latency_count{operation=~\"$operation\"}[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{operation}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Latencies Per Operation", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "avg by(operation, shard_id) (rate(persistence_latency_per_shard_sum{operation=~\"$operation\"}[$__rate_interval]) / rate(persistence_latency_per_shard_count{operation=~\"$operation\"}[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{operation}} - {{shard_id}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Latencies Per Operation Per Shard", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 22 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Total", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "100 * (1- (rate(persistence_errors{operation=~\"$operation\"}[$__rate_interval]) / rate(persistence_requests{operation=~\"$operation\"}[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{operation}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Success Rate Per Operation", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 22 + }, + "id": 6, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "# TODO: Persistence errors per shard metric is not defined. Once it's added update this panel", + "mode": "markdown" + }, + "pluginVersion": "10.2.2", + "title": "Success Rate Per Operation Per Shard", + "type": "text" + } + ], + "refresh": "", + "schemaVersion": 38, + "tags": [], + "templating": { + "list": [ + { + "allValue": "", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values(persistence_requests,operation)", + "hide": 0, + "includeAll": true, + "label": "operation", + "multi": false, + "name": "operation", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(persistence_requests,operation)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 5, + "type": "query" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Cadence-Persistence", + "uid": "b00f7dd5-3dd6-4cf9-b17b-425e071e2b5b", + "version": 17, + "weekStart": "" + } diff --git a/docker/buildkite/grafana/provisioning/dashboards/cadence-server.json b/docker/buildkite/grafana/provisioning/dashboards/cadence-server.json new file mode 100644 index 00000000000..3b03ff9e25f --- /dev/null +++ b/docker/buildkite/grafana/provisioning/dashboards/cadence-server.json @@ -0,0 +1,5365 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": null, + "graphTooltip": 0, + "id": 5210895, + "iteration": 1638067194224, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 46, + "panels": [], + "title": "Overall", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cadence_errors{cadence_service=\"cadence_frontend\", namespace=\"$namespace\"})", + "interval": "", + "legendFormat": "\"Errors\"", + "refId": "A" + }, + { + "expr": "sum(cadence_requests{cadence_service=\"cadence_frontend\", namespace=\"$namespace\"})", + "interval": "", + "legendFormat": "\"Requests\"", + "refId": "B" + }, + { + "expr": "1-(sum(cadence_errors{cadence_service=\"cadence_frontend\", namespace=\"$namespace\"}))/(sum(cadence_requests{cadence_service=\"cadence_frontend\", namespace=\"$namespace\"}))*100", + "interval": "", + "legendFormat": "\"Server availability\"", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Service Availability(server metrics)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "TODO: how to add rate()? https://stackoverflow.com/questions/66286901/how-to-apply-rate-to-expression-with-name", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cadence_requests{namespace=\"$namespace\",operation=\"PollForActivityTask\"}[2m]))", + "format": "time_series", + "hide": false, + "interval": "", + "legendFormat": "Activities Started per Second:", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Activities Started Per Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cadence_requests{cadence_service=\"cadence_frontend\",namespace=\"$namespace\",operation=\"StartWorkflowExecution\"}[1m]))", + "hide": false, + "interval": "", + "legendFormat": "StartWorkflowExecution", + "refId": "A" + }, + { + "expr": "sum(rate(cadence_requests{cadence_service=\"cadence_frontend\",namespace=\"$namespace\",operation=\"SignalWithStartWorkflowExecution\"}[1m]))", + "hide": false, + "interval": "", + "legendFormat": "SignalWithStartWorkflowExecution:", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "StartWorkflow Per Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cadence_requests{namespace=\"$namespace\",operation=\"PollForDecisionTask\"}[2m]))", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Decisions Started Per Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 17 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(workflow_success{cadence_service=\"cadence_history\",namespace=\"$namespace\",workflowType=\"workflow_sanity\"}) by (workflowType)", + "interval": "", + "legendFormat": "{{workflowType}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Periodical Test Suite Success(aka Canary)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 25 + }, + "id": 44, + "panels": [], + "title": "Frontend", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 26 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "(1-sum(cadence_errors{cadence_service=\"cadence_frontend\",namespace=\"$namespace\"})/sum(cadence_requests{cadence_service=\"cadence_frontend\",namespace=\"$namespace\"}))*100", + "hide": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Service API success rate by Client(includes client side error like EntityNotExists)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 26 + }, + "hiddenSeries": false, + "id": 47, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(1-sum(cadence_errors{cadence_service=\"cadence_frontend\",namespace=\"$namespace\"})/sum(cadence_requests{cadence_service=\"cadence_frontend\",namespace=\"$namespace\"}))*100", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Service Availability(server metrics)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 34 + }, + "hiddenSeries": false, + "id": 48, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cadence_requests{cadence_service=\"cadence_frontend\",namespace=\"$namespace\"}[2m]))", + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "All API per second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 34 + }, + "hiddenSeries": false, + "id": 49, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cadence_requests{cadence_service=\"cadence_frontend\",namespace=\"$namespace\"}[2m])) by (operation)", + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "API per second(breakdown per operation)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 42 + }, + "hiddenSeries": false, + "id": 50, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile($quantiles, (sum by (le, operation) (rate(cadence_latency_bucket{namespace=\"$namespace\",operation!~\"CountWorkflowExecutions|GetWorkflowExecutionHistory|ListClosedWorkflowExecutions|ListOpenWorkflowExecutions|ListWorkflowExecutions|PollForActivityTask|PollForDecisionTask|QueryWorkflow\"}[2m]))))", + "hide": false, + "interval": "", + "legendFormat": "{{operation}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Regular API Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 42 + }, + "hiddenSeries": false, + "id": 51, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile($quantiles, (sum by (le, operation) (rate(cadence_latency_bucket{namespace=\"$namespace\",operation=~\"CountWorkflowExecutions|GetWorkflowExecutionHistory|ListClosedWorkflowExecutions|ListOpenWorkflowExecutions|ListWorkflowExecutions|PollForActivityTask|PollForDecisionTask|QueryWorkflow\"}[2m]))))", + "hide": false, + "interval": "", + "legendFormat": "{{operation}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "ListWorkflow API Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 50 + }, + "hiddenSeries": false, + "id": 52, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile($quantiles, (sum by (le, operation) (rate(cadence_latency_bucket{namespace=\"$namespace\",operation=\"PollForDecisionTask\"}[2m]))))", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "B" + }, + { + "expr": "histogram_quantile($quantiles, (sum by (le, operation) (rate(cadence_latency_bucket{namespace=\"$namespace\",operation=\"PollForActivityTask\"}[2m]))))", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Long Poll API Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 50 + }, + "hiddenSeries": false, + "id": 53, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile($quantiles, (sum by (le, operation) (rate(cadence_latency_bucket{namespace=\"$namespace\", cadence_service=\"cadence_frontend\", operation=~\"GetWorkflowExecutionHistory|QueryWorkflow\"}[2m]))))", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "GetHistory/QueryWorkflow API Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 58 + }, + "hiddenSeries": false, + "id": 54, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cadence_errors{cadence_service=\"cadence_frontend\"}[2m]))", + "interval": "", + "legendFormat": "Internal server error", + "refId": "A" + }, + { + "expr": "sum(rate(cadence_errors_bad_request{cadence_service=\"cadence_frontend\"}[2m]))", + "hide": false, + "interval": "", + "legendFormat": "Bad Requests error", + "refId": "B" + }, + { + "expr": "sum(rate(cadence_errors_query_failed{cadence_service=\"cadence_frontend\"}[2m]))", + "interval": "", + "legendFormat": "QueryFailed error", + "refId": "C" + }, + { + "expr": "sum(rate(cadence_errors_context_timeout{cadence_service=\"cadence_frontend\"}[2m]))", + "interval": "", + "legendFormat": "ContextTimeout error", + "refId": "D" + }, + { + "expr": "sum(rate(cadence_errors_entity_not_exists{cadence_service=\"cadence_frontend\"}[2m]))", + "interval": "", + "legendFormat": "EntityNotExists error", + "refId": "E" + }, + { + "expr": "sum(rate(cadence_errors_execution_already_started{cadence_service=\"cadence_frontend\"}[2m]))", + "interval": "", + "legendFormat": "WorkflowAlreadyStarted error", + "refId": "F" + }, + { + "expr": "sum(rate(cadence_errors_workflow_execution_already_completed{cadence_service=\"cadence_frontend\"}[2m]))", + "interval": "", + "legendFormat": "WorkflowAlreadyCompleted", + "refId": "G" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "API errors per second(breakdown per operation)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 58 + }, + "hiddenSeries": false, + "id": 55, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cadence_requests{cadence_service=\"cadence_frontend\",namespace=\"$namespace\", operation=~\"SignalWithStartworkflowExecution|SignalWorkflowExecution|StartWorkflowExecution|TerminateWorkflowExecution|ResetWorkflowExecution|RequestCancelWorkflowExecution|ListWorkflowExecutions\"}[2m])) by (domain, operation)", + "hide": false, + "interval": "", + "legendFormat": "Domain: {{domain}}- Operation: {{operation}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "WorkflowClient API per seconds by domain", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 66 + }, + "hiddenSeries": false, + "id": 56, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cadence_requests{cadence_service=\"cadence_frontend\",namespace=\"$namespace\", operation!~\"CountWorkflowExecutions|GetWorkflowExecutionHistory|ListClosedWorkflowExecutions|ListOpenWorkflowExecutions|ListWorkflowExecutions|PollForActivityTask|PollForDecisionTask|QueryWorkflow\"}) by (domain, operation)", + "hide": false, + "interval": "", + "legendFormat": "Domain: {{domain}}- Operation: {{operation}}", + "refId": "B" + }, + { + "expr": "", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Regular API Per Domain", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 23, + "x": 0, + "y": 74 + }, + "id": 57, + "interval": null, + "links": [], + "options": { + "displayMode": "gradient", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "max" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.2.1", + "targets": [ + { + "expr": "sum(cadence_requests{cadence_service=\"cadence_frontend\",namespace=\"$namespace\", operation=~\"SignalWithStartWorkflowExecution|SignalWorkflowxecution|StartWorkflowExecution|TerminateWorkflowExecution|ResetWorkflowExecution|RequestCancelWorkflowexecution|ListWorkflowExecutions\"}) by (domain)", + "hide": false, + "interval": "", + "legendFormat": "{{domain}}", + "refId": "B" + }, + { + "expr": "", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top Domains for Regular Requests", + "type": "bargauge" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 23, + "x": 0, + "y": 80 + }, + "id": 58, + "interval": null, + "links": [], + "options": { + "displayMode": "gradient", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.2.1", + "targets": [ + { + "expr": "sum(cadence_requests{cadence_service=\"cadence_frontend\",namespace=\"$namespace\", operation=~\"PollForDecisionTask|GetWorkflowExecutionHistory|RespondDecisionTaskCompleted\"}) by (domain)", + "hide": false, + "interval": "", + "legendFormat": "{{domain}}{{operation}}", + "refId": "B" + }, + { + "expr": "cadence_requests{cadence_service=\"cadence_worker\",namespace=\"$namespace\"}", + "hide": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top Domains for Workflow Worker Requests", + "type": "bargauge" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 23, + "x": 0, + "y": 86 + }, + "id": 59, + "interval": null, + "links": [], + "options": { + "displayMode": "gradient", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.2.1", + "targets": [ + { + "expr": "sum(cadence_requests{cadence_service=\"cadence_frontend\",namespace=\"$namespace\", operation=~\"PollForActivityTask|RespondActivityTaskCompleted\"}) by (domain)", + "hide": false, + "interval": "", + "legendFormat": "{{domain}}{{operation}}", + "refId": "B" + }, + { + "expr": "cadence_requests{cadence_service=\"cadence_worker\",namespace=\"$namespace\"}", + "hide": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top Domains for Activity Worker Requests", + "type": "bargauge" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 23, + "x": 0, + "y": 92 + }, + "id": 60, + "interval": null, + "links": [], + "options": { + "displayMode": "gradient", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.2.1", + "targets": [ + { + "expr": "sum(cadence_requests{cadence_service=\"cadence_frontend\",namespace=\"$namespace\"}) by (domain)", + "hide": false, + "interval": "", + "legendFormat": "{{domain}}{{operation}}", + "refId": "B" + }, + { + "expr": "cadence_requests{cadence_service=\"cadence_worker\",namespace=\"$namespace\"}", + "hide": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top Domains for All Requests", + "type": "bargauge" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 98 + }, + "id": 42, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 3 + }, + "hiddenSeries": false, + "id": 61, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(membership_changed_count{cadence_service=\"cadence_history\",namespace=\"$namespace\",operation=\"ShardController\"})", + "hide": false, + "interval": "", + "legendFormat": "Membership Changed Count", + "refId": "A" + }, + { + "expr": "sum(shard_closed_count{cadence_service=\"cadence_history\",namespace=\"$namespace\",operation=\"ShardController\"})", + "interval": "", + "legendFormat": "Shard Closed Count", + "refId": "B" + }, + { + "expr": "sum(sharditem_created_count{cadence_service=\"cadence_history\",namespace=\"$namespace\",operation=\"ShardController\"})", + "interval": "", + "legendFormat": "Shard Item Created Count", + "refId": "C" + }, + { + "expr": "sum(sharditem_removed_count{cadence_service=\"cadence_history\",namespace=\"$namespace\",operation=\"ShardController\"})", + "interval": "", + "legendFormat": "Shard Item Removed Count", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "History Shard Movements", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 3 + }, + "hiddenSeries": false, + "id": 62, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cadence_requests{cadence_service=\"cadence_history\",namespace=\"$namespace\"}[2m]))", + "hide": false, + "interval": "", + "legendFormat": "Membership Changed Count", + "refId": "A" + }, + { + "expr": "", + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "History API per Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 11 + }, + "hiddenSeries": false, + "id": 63, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cadence_errors{cadence_service=\"cadence_history\"}[2m]))", + "hide": false, + "interval": "", + "legendFormat": "cadence errors: ", + "refId": "A" + }, + { + "expr": "sum(rate(cadence_errors_entity_not_exists{cadence_service=\"cadence_history\"}[2m]))", + "hide": false, + "interval": "", + "legendFormat": "Error: entity_not_exists", + "refId": "B" + }, + { + "expr": "sum(cadence_errors_execution_already_started{cadence_service=\"cadence_history\"})", + "hide": false, + "interval": "", + "legendFormat": "Error: execution_already_started", + "refId": "C" + }, + { + "expr": "sum(cadence_errors_workflow_id_exceeded_warn_limit{cadence_service=\"cadence_history\"})", + "hide": false, + "interval": "", + "legendFormat": "Error: workflow_id_exceeded_warn_limit", + "refId": "D" + }, + { + "expr": "sum(cadence_errors_workflow_execution_already_completed{cadence_service=\"cadence_history\"})", + "hide": false, + "interval": "", + "legendFormat": "Error: workflow_execution_already_completed", + "refId": "E" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "History API Errors per Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 11 + }, + "hiddenSeries": false, + "id": 64, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile($quantiles, (sum by (le, operation) (rate(cadence_latency_bucket{namespace=\"$namespace\", cadence_service=\"cadence_history\",operation!~\"PollMutableState\"}[2m]))))", + "hide": false, + "interval": "", + "legendFormat": "{{operation}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Regular API Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 19 + }, + "hiddenSeries": false, + "id": 65, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(task_requests{cadence_service=\"cadence_history\",namespace=\"$namespace\",operation=~\"TransferActiveTask.*\"}[2m]))", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Transfer Tasks Per Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 19 + }, + "hiddenSeries": false, + "id": 66, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(task_requests{cadence_service=\"cadence_history\",namespace=\"$namespace\",operation=~\"TimerActiveTask.*\"}[2m]))", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Timer Tasks Per Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 27 + }, + "hiddenSeries": false, + "id": 67, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(task_requests_per_domain{cadence_service=\"cadence_history\",namespace=\"$namespace\",operation=~\"TransferActive.*\"}) by (domain)", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Transfer Tasks Per Domain", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 27 + }, + "hiddenSeries": false, + "id": 68, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(task_requests_per_domain{cadence_service=\"cadence_history\",namespace=\"$namespace\",operation=~\"TimerActive.*\"}) by (domain)", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Timer Tasks Per Domain", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 35 + }, + "hiddenSeries": false, + "id": 69, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile($quantiles, sum(rate(task_latency_per_domain_bucket{namespace=\"$namespace\",operation=~\"TransferActive.*\"}[2m])) by (le, domain))", + "hide": false, + "interval": "", + "legendFormat": "{{operation}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Transfer Task Latency Per Domain", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 35 + }, + "hiddenSeries": false, + "id": 70, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile($quantiles, sum(rate(task_latency_per_domain_bucket{namespace=\"$namespace\",operation=~\"TimerActive.*\"}[2m])) by (le, domain))", + "hide": false, + "interval": "", + "legendFormat": "{{operation}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Timer Task Latency Per Domain", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 43 + }, + "hiddenSeries": false, + "id": 71, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile($quantiles, sum(rate(task_latency_per_domain_bucket{namespace=\"$namespace\",operation=~\"Transfer.*\"}[2m])) by (le, operation))", + "hide": false, + "interval": "", + "legendFormat": "{{operation}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Transfer Task Executing Latency By TaskType", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 43 + }, + "hiddenSeries": false, + "id": 72, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile($quantiles, sum(rate(task_latency_per_domain_bucket{namespace=\"$namespace\",operation=~\"Timer.*\"}[2m])) by (le, operation))", + "interval": "", + "legendFormat": "{{operation}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Timer Task Executing Latency By TaskType", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 51 + }, + "hiddenSeries": false, + "id": 73, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile($quantiles, sum(rate(task_latency_queue_bucket{namespace=\"$namespace\",operation=~\"Transfer.*\"}[2m])) by (le, operation))", + "hide": false, + "interval": "", + "legendFormat": "{{operation}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Transfer Task Queue Latency By TaskType", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 51 + }, + "hiddenSeries": false, + "id": 74, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile($quantiles, sum(rate(task_latency_queue_bucket{namespace=\"$namespace\",operation=~\"Timer.*\"}[2m])) by (le, operation))", + "interval": "", + "legendFormat": "{{operation}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Timer Task Queue Latency By TaskType", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 59 + }, + "hiddenSeries": false, + "id": 75, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(transfer_task_throttled_counter{cadence_service=\"cadence_history\",namespace=\"$namespace\",operation=~\"Transfer.*\"}) by (domain)", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "expr": "sum(timer_task_throttled_counter{cadence_service=\"cadence_history\",namespace=\"$namespace\",operation=~\"Transfer.*\"}) by (domain)", + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Transfer/Timer task throttling by domain", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 59 + }, + "hiddenSeries": false, + "id": 76, + "legend": { + "avg": false, + "current": false, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile($quantiles, sum(history_size_bucket{namespace=\"$namespace\",domain!=\"all\"}) by (le, domain)) * 1000000000", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Max History Size(in bytes) by domain", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 67 + }, + "hiddenSeries": false, + "id": 77, + "legend": { + "avg": true, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": " sum(rate(history_count_sum{namespace=\"$namespace\",domain!=\"all\"}[2m])) by (le, domain) * 1000000000 / sum(rate(history_count_count{namespace=\"$namespace\",domain!=\"all\"}[2m])) by (le, domain)", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Avg History Length by Domain", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 67 + }, + "hiddenSeries": false, + "id": 78, + "legend": { + "avg": false, + "current": false, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile($quantiles, sum(event_blob_size_bucket{namespace=\"$namespace\"}) by (le, domain)) * 1000000000", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Max Event Blob Size by Domain", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "History", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 99 + }, + "id": 40, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 4 + }, + "hiddenSeries": false, + "id": 79, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cadence_requests{cadence_service=\"cadence_matching\",namespace=\"$namespace\"}[2m]))", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "expr": "", + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Matching APIs per Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 4 + }, + "hiddenSeries": false, + "id": 80, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cadence_errors_per_tl{cadence_service=\"cadence_matching\"}[2m]))", + "hide": false, + "interval": "", + "legendFormat": "Error: errors_per_tl", + "refId": "A" + }, + { + "expr": "sum(rate(cadence_errors_query_failed_per_tl{cadence_service=\"cadence_matching\"}[2m]))", + "hide": false, + "interval": "", + "legendFormat": "Error: query_failed_per_tl", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Matching API Errors per Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 12 + }, + "hiddenSeries": false, + "id": 81, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile($quantiles, (sum by (le, operation) (rate(cadence_latency_per_tl_bucket{namespace=\"$namespace\", cadence_service=\"cadence_matching\",operation!~\"QueryWorkflow\"}[2m]))))", + "interval": "", + "legendFormat": "{{operation}}", + "refId": "A" + }, + { + "expr": "", + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Matching Regular API Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 12 + }, + "hiddenSeries": false, + "id": 82, + "legend": { + "avg": true, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile($quantiles, (sum by (le, operation, tasklist) (rate(cadence_latency_per_tl_bucket{namespace=\"$namespace\", cadence_service=\"cadence_matching\",operation=~\"PollFor.*\"}[2m]))))", + "interval": "", + "legendFormat": "{{operation}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Matching Long Poll API Latency ", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 20 + }, + "hiddenSeries": false, + "id": 83, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile($quantiles, (sum by (le, operation, tasklist, domain) (syncmatch_latency_per_tl_bucket{namespace=\"$namespace\", cadence_service=\"cadence_matching\"})))", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Sync Match Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 20 + }, + "hiddenSeries": false, + "id": 84, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile($quantiles, (sum by (le, operation, tasklist, domain) (asyncmatch_latency_per_tl_bucket{namespace=\"$namespace\", cadence_service=\"cadence_matching\"})))", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Async Match Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 28 + }, + "hiddenSeries": false, + "id": 85, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cadence_requests_per_tl{cadence_service=\"cadence_matching\",namespace=\"$namespace\", operation=~\"Add.*\"}[2m])) by (operation,tasklist,domain)", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "expr": "", + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "AddTasks per Second - domain, tasklist breakdown", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 28 + }, + "hiddenSeries": false, + "id": 86, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cadence_requests_per_tl{cadence_service=\"cadence_matching\",namespace=\"$namespace\",operation!~\"Add.*\"}[2m])) by (operation,tasklist,domain)", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Other API per Second - domain, tasklist breakdown", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Matching", + "type": "row" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 100 + }, + "id": 38, + "panels": [], + "title": "Persistence-Database", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 101 + }, + "hiddenSeries": false, + "id": 87, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(1 - ( sum(rate(persistence_errors{cadence_service=\"cadence_frontend\",namespace=\"$namespace\"}[2m]))/\n sum(rate(persistence_requests{cadence_service=\"cadence_frontend\",namespace=\"$namespace\"}[2m])) ) ) * 100", + "hide": false, + "interval": "", + "legendFormat": "cadence_frontend.persistence", + "refId": "A" + }, + { + "expr": "(1 - ( sum(rate(persistence_errors{cadence_service=\"cadence_matching\",namespace=\"$namespace\"}[2m]))/\n sum(rate(persistence_requests{cadence_service=\"cadence_matching\",namespace=\"$namespace\"}[2m])) ) ) * 100", + "interval": "", + "legendFormat": "cadence_matching.persistence", + "refId": "B" + }, + { + "expr": "(1 - ( sum(rate(persistence_errors{cadence_service=\"cadence_history\",namespace=\"$namespace\"}[2m]))/\n sum(rate(persistence_requests{cadence_service=\"cadence_history\",namespace=\"$namespace\"}[2m])) ) ) * 100", + "interval": "", + "legendFormat": "cadence_history.persistence", + "refId": "C" + }, + { + "expr": "(1 - ( sum(rate(persistence_errors{cadence_service=\"cadence_worker\",namespace=\"$namespace\"}[2m]))/\n sum(rate(persistence_requests{cadence_service=\"cadence_worker\",namespace=\"$namespace\"}[2m])) ) ) * 100", + "interval": "", + "legendFormat": "cadence_worker.persistence", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Persistence-Database", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 101 + }, + "hiddenSeries": false, + "id": 88, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(persistence_requests{cadence_service=\"cadence_frontend\",namespace=\"$namespace\"}[2m]))", + "hide": false, + "interval": "", + "legendFormat": "cadence_frontend", + "refId": "A" + }, + { + "expr": "sum(rate(persistence_requests{cadence_service=\"cadence_matching\",namespace=\"$namespace\"}[2m]))", + "interval": "", + "legendFormat": "cadence_matching", + "refId": "B" + }, + { + "expr": "sum(rate(persistence_requests{cadence_service=\"cadence_history\",namespace=\"$namespace\"}[2m]))", + "interval": "", + "legendFormat": "cadence_history", + "refId": "C" + }, + { + "expr": "sum(rate(persistence_requests{cadence_service=\"cadence_worker\",namespace=\"$namespace\"}[2m]))", + "interval": "", + "legendFormat": "cadence_worker", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Persistence By Service TPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 109 + }, + "hiddenSeries": false, + "id": 89, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(persistence_requests{cadence_service=\"cadence_frontend\",namespace=\"$namespace\"}[2m])) by (le, operation)", + "hide": false, + "interval": "", + "legendFormat": "cadence_frontend-{{operation}}", + "refId": "A" + }, + { + "expr": "sum(rate(persistence_requests{cadence_service=\"cadence_matching\",namespace=\"$namespace\"}[2m])) by (le, operation)", + "interval": "", + "legendFormat": "cadence_matching-{{operation}}", + "refId": "B" + }, + { + "expr": "sum(rate(persistence_requests{cadence_service=\"cadence_history\",namespace=\"$namespace\"}[2m])) by (le, operation)", + "interval": "", + "legendFormat": "cadence_history-{{operation}}", + "refId": "C" + }, + { + "expr": "sum(rate(persistence_requests{cadence_service=\"cadence_worker\",namespace=\"$namespace\"}[2m])) by (le, operation)", + "interval": "", + "legendFormat": "cadence_worker-{{operation}}", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Persistence By Operation TPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 109 + }, + "hiddenSeries": false, + "id": 90, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile($quantiles, (sum by (le, operation) (persistence_latency_bucket{namespace=\"$namespace\", cadence_service=\"cadence_frontend\"}))) ", + "hide": false, + "interval": "", + "legendFormat": "cadence_frontend-{{operation}}", + "refId": "A" + }, + { + "expr": "histogram_quantile($quantiles, (sum by (le, operation) (persistence_latency_bucket{namespace=\"$namespace\", cadence_service=\"cadence_matching\"}))) ", + "interval": "", + "legendFormat": "cadence_matching-{{operation}}", + "refId": "B" + }, + { + "expr": "histogram_quantile($quantiles, (sum by (le, operation) (persistence_latency_bucket{namespace=\"$namespace\", cadence_service=\"cadence_history\"}))) ", + "interval": "", + "legendFormat": "cadence_history-{{operation}}", + "refId": "C" + }, + { + "expr": "histogram_quantile($quantiles, (sum by (le, operation) (persistence_latency_bucket{namespace=\"$namespace\", cadence_service=\"cadence_worker\"}))) ", + "interval": "", + "legendFormat": "cadence_worker-{{operation}}", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Persistence By Operation Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 117 + }, + "hiddenSeries": false, + "id": 91, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (le, operation) (persistence_errors{namespace=\"$namespace\", cadence_service=\"cadence_frontend\"})", + "hide": false, + "interval": "", + "legendFormat": "cadence_frontend-{{operation}}", + "refId": "A" + }, + { + "expr": "sum by (le, operation) (persistence_errors{namespace=\"$namespace\", cadence_service=\"cadence_matching\"})", + "interval": "", + "legendFormat": "cadence_matching-{{operation}}", + "refId": "B" + }, + { + "expr": "sum by (le, operation) (persistence_errors{namespace=\"$namespace\", cadence_service=\"cadence_history\"})", + "interval": "", + "legendFormat": "cadence_history-{{operation}}", + "refId": "C" + }, + { + "expr": "sum by (le, operation) (persistence_errors{namespace=\"$namespace\", cadence_service=\"cadence_worker\"})", + "interval": "", + "legendFormat": "cadence_worker-{{operation}}", + "refId": "D" + }, + { + "expr": "sum by (le, operation) (persistence_errors_condition_failed{namespace=\"$namespace\", cadence_service=\"cadence_matching\"})", + "interval": "", + "legendFormat": "matching-condition-failed", + "refId": "E" + }, + { + "expr": "sum by (le, operation) (persistence_errors_shard_ownership_lost{namespace=\"$namespace\", cadence_service=\"cadence_history\"})", + "interval": "", + "legendFormat": "history-shard-owneship-lost", + "refId": "F" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Persistence Error By Operation Count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 125 + }, + "id": 36, + "panels": [], + "repeat": null, + "title": "Persistence-Kafka&ElasticSearch(Visibility)", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 126 + }, + "hiddenSeries": false, + "id": 92, + "legend": { + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(1 - ( sum(rate(elasticsearch_errors{cadence_service=\"cadence_frontend\",namespace=\"$namespace\"}[2m]))/\n sum(rate(elasticsearch_requests{cadence_service=\"cadence_frontend\",namespace=\"$namespace\"}[2m])) ) ) * 100", + "hide": false, + "interval": "", + "legendFormat": "cadence_frontend-{{operation}}", + "refId": "A" + }, + { + "expr": "(1 - ( sum(rate(elasticsearch_errors{cadence_service=\"cadence_history\",namespace=\"$namespace\"}[2m]))/\n sum(rate(elasticsearch_requests{cadence_service=\"cadence_history\",namespace=\"$namespace\"}[2m])) ) ) * 100", + "interval": "", + "legendFormat": "cadence_history-{{operation}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Persistence-Kafka&ElasticSearch(Visibility)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": false, + "schemaVersion": 31, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": false, + "text": "cadence-sta", + "value": "cadence-sta" + }, + "datasource": "Prometheus", + "definition": "label_values(cadence_requests,namespace)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(cadence_requests,namespace)", + "refId": "Prometheus-namespace-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "all", + "value": "all" + }, + "datasource": "Prometheus", + "definition": "label_values(activity_end_to_end_latency_sum{namespace=\"$namespace\"}, domain)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Domain", + "multi": true, + "name": "domain", + "options": [], + "query": { + "query": "label_values(activity_end_to_end_latency_sum{namespace=\"$namespace\"}, domain)", + "refId": "Prometheus-domain-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "0.99", + "value": "0.99" + }, + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Quantiles", + "multi": false, + "name": "quantiles", + "options": [ + { + "selected": false, + "text": "0.5", + "value": "0.5" + }, + { + "selected": false, + "text": "0.95", + "value": "0.95" + }, + { + "selected": true, + "text": "0.99", + "value": "0.99" + } + ], + "query": "0.5, 0.95, 0.99", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Cadence-Server-Basic", + "uid": "1B0efRyGz", + "version": 2 +} \ No newline at end of file diff --git a/docker/buildkite/grafana/provisioning/dashboards/default.yaml b/docker/buildkite/grafana/provisioning/dashboards/default.yaml new file mode 100644 index 00000000000..997d1c3969f --- /dev/null +++ b/docker/buildkite/grafana/provisioning/dashboards/default.yaml @@ -0,0 +1,9 @@ +apiVersion: 1 + +providers: + - name: Cadence-Basic # A uniquely identifiable name for the provider + folder: Cadence + type: file + options: + path: + /etc/grafana/provisioning/dashboards/ diff --git a/docker/buildkite/grafana/provisioning/datasources/default.yaml b/docker/buildkite/grafana/provisioning/datasources/default.yaml new file mode 100644 index 00000000000..d5ff89b5bfe --- /dev/null +++ b/docker/buildkite/grafana/provisioning/datasources/default.yaml @@ -0,0 +1,6 @@ +datasources: + - name: Prometheus + type: prometheus + url: http://prometheus:9090 + version: 1 + editable: true diff --git a/docker/buildkite/prometheus/.gitignore b/docker/buildkite/prometheus/.gitignore new file mode 100644 index 00000000000..397b4a7624e --- /dev/null +++ b/docker/buildkite/prometheus/.gitignore @@ -0,0 +1 @@ +*.log diff --git a/docker/buildkite/prometheus/matching_simulation_prometheus.yml b/docker/buildkite/prometheus/matching_simulation_prometheus.yml new file mode 100644 index 00000000000..c1cc541b1a8 --- /dev/null +++ b/docker/buildkite/prometheus/matching_simulation_prometheus.yml @@ -0,0 +1,14 @@ +global: + scrape_interval: 5s + external_labels: + monitor: 'cadence-monitor' + query_log_file: /etc/prometheus/query.log + scrape_failure_log_file: /etc/prometheus/scrape.log +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: # addresses to scrape + - 'cadence:8306' + - 'cadence:8307' + - 'cadence:8308' + - 'cadence:8309' diff --git a/host/matching_simulation_test.go b/host/matching_simulation_test.go index 6fa2280f21b..12b1f99f7bc 100644 --- a/host/matching_simulation_test.go +++ b/host/matching_simulation_test.go @@ -185,6 +185,8 @@ func (s *MatchingSimulationSuite) SetupTest() { } func (s *MatchingSimulationSuite) TearDownSuite() { + // Sleep for a while to ensure all metrics are emitted/scraped by prometheus + time.Sleep(5 * time.Second) s.tearDownSuite() } diff --git a/host/onebox.go b/host/onebox.go index be72d1e0686..ebe2ea5cfa6 100644 --- a/host/onebox.go +++ b/host/onebox.go @@ -29,6 +29,7 @@ import ( "github.com/pborman/uuid" "github.com/uber-go/tally" + "github.com/uber-go/tally/prometheus" apiv1 "github.com/uber/cadence-idl/go/proto/api/v1" cwsc "go.uber.org/cadence/.gen/go/cadence/workflowserviceclient" "go.uber.org/cadence/compatibility" @@ -526,6 +527,31 @@ func (c *cadenceImpl) MatchingPProfPorts() []int { return ports } +func (c *cadenceImpl) MatchingPrometheusPorts() []int { + var ports []int + var startPort int + switch c.clusterNo { + case 0: + startPort = 7306 + case 1: + startPort = 8306 + case 2: + startPort = 9306 + case 3: + startPort = 10306 + default: + startPort = 7306 + } + + for i := 0; i < c.matchingConfig.NumMatchingHosts; i++ { + port := startPort + i + ports = append(ports, port) + } + + c.logger.Info("Matching prometheus ports", tag.Value(ports)) + return ports +} + func (c *cadenceImpl) WorkerServiceHost() membership.HostInfo { var tchan uint16 switch c.clusterNo { @@ -740,6 +766,7 @@ func (c *cadenceImpl) startHistory(hosts map[string][]membership.HostInfo, start func (c *cadenceImpl) startMatching(hosts map[string][]membership.HostInfo, startWG *sync.WaitGroup) { pprofPorts := c.MatchingPProfPorts() + promPorts := c.MatchingPrometheusPorts() for i, hostport := range c.MatchingHosts() { hostport.Identity() matchingHost := fmt.Sprintf("matching-host-%d:%s", i, hostport.Identity()) @@ -749,7 +776,13 @@ func (c *cadenceImpl) startMatching(hosts map[string][]membership.HostInfo, star params.ThrottledLogger = c.logger params.TimeSource = c.timeSource params.PProfInitializer = newPProfInitializerImpl(c.logger, pprofPorts[i]) - params.MetricScope = tally.NewTestScope(service.Matching, map[string]string{"matching-host": matchingHost}) + metricsCfg := config.Metrics{ + Prometheus: &prometheus.Configuration{ + TimerType: "histogram", + ListenAddress: fmt.Sprintf(":%d", promPorts[i]), + }, + } + params.MetricScope = metricsCfg.NewScope(c.logger, service.Matching) params.MetricsClient = metrics.NewClient(params.MetricScope, service.GetMetricsServiceIdx(params.Name, c.logger)) params.RPCFactory = c.newRPCFactory(service.Matching, hostport, params.MetricsClient) params.MembershipResolver = newMembershipResolver(params.Name, hosts, hostport) diff --git a/scripts/run_matching_simulator.sh b/scripts/run_matching_simulator.sh index d6d30c19d6b..1b708974147 100755 --- a/scripts/run_matching_simulator.sh +++ b/scripts/run_matching_simulator.sh @@ -22,7 +22,8 @@ docker-compose -f docker/buildkite/docker-compose-local-matching-simulation.yml echo "Running the test $testCase" docker-compose \ -f docker/buildkite/docker-compose-local-matching-simulation.yml \ - run -e MATCHING_SIMULATION_CONFIG=$testCfg --rm --remove-orphans matching-simulator \ + run -e MATCHING_SIMULATION_CONFIG=$testCfg --rm --remove-orphans --service-ports --use-aliases \ + matching-simulator \ | grep -a --line-buffered "Matching New Event" \ | sed "s/Matching New Event: //" \ | jq . > "$eventLogsFile" @@ -243,4 +244,5 @@ cat "$eventLogsFile"\ echo "End of summary" | tee -a $testSummaryFile printf "\nResults are saved in $testSummaryFile\n" -printf "For further analysis, please check $eventLogsFile via jq queries\n" +printf "For further ad-hoc analysis, please check $eventLogsFile via jq queries\n" +printf "Visit http://localhost:3000/ to view Cadence Matching grafana dashboard\n"