File tree Expand file tree Collapse file tree 3 files changed +4
-4
lines changed Expand file tree Collapse file tree 3 files changed +4
-4
lines changed Original file line number Diff line number Diff line change 7575 },
7676 "targets" : [
7777 {
78- "expr" : " worker_executor_active_tasks_total " ,
78+ "expr" : " worker_executor_active_tasks " ,
7979 "refId" : " A"
8080 }
8181 ],
Original file line number Diff line number Diff line change @@ -11,7 +11,7 @@ groups:
1111 description : " Worker task error rate is {{ $value }} errors per minute, exceeding threshold"
1212 runbook : " Check worker logs for task failures"
1313 - alert : LongRunningTask
14- expr : histogram_quantile(0.95, sum(rate(worker_executor_task_duration_seconds_bucket[5m]))) > 30
14+ expr : histogram_quantile(0.95, sum(rate(worker_executor_task_duration_seconds_bucket[5m])) by (le) ) > 30
1515 for : 10m
1616 labels :
1717 severity : warning
@@ -20,7 +20,7 @@ groups:
2020 description : " 95th percentile task duration is {{ $value }}s, above 30s threshold"
2121 runbook : " Review task performance and optimize if needed"
2222 - alert : HighActiveTasks
23- expr : worker_executor_active_tasks_total > 10
23+ expr : worker_executor_active_tasks > 10
2424 for : 5m
2525 labels :
2626 severity : warning
Original file line number Diff line number Diff line change @@ -25,7 +25,7 @@ func newMetrics() *metrics {
2525 activeTasksCounter : promauto .NewGauge (prometheus.GaugeOpts {
2626 Namespace : "worker" ,
2727 Subsystem : "executor" ,
28- Name : "active_tasks_total " ,
28+ Name : "active_tasks " ,
2929 }),
3030 taskResult : promauto .NewCounterVec (prometheus.CounterOpts {
3131 Namespace : "worker" ,
You can’t perform that action at this time.
0 commit comments