Skip to content

Commit f717b73

Browse files
authored
Merge pull request #2156 from sthaha/feat-track-terminated
feat(monitor): track terminated processes
2 parents d700f89 + 5e91e0f commit f717b73

File tree

8 files changed

+878
-40
lines changed

8 files changed

+878
-40
lines changed

docs/metrics/metrics.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ These metrics provide energy and power information for individual processes.
138138
- `comm`
139139
- `exe`
140140
- `type`
141+
- `state`
141142
- `container_id`
142143
- `vm_id`
143144
- `zone`
@@ -167,6 +168,7 @@ These metrics provide energy and power information for individual processes.
167168
- `comm`
168169
- `exe`
169170
- `type`
171+
- `state`
170172
- `container_id`
171173
- `vm_id`
172174
- `zone`

internal/exporter/prometheus/collector/power_collector.go

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,8 @@ func NewPowerCollector(monitor PowerDataProvider, nodeName string, logger *slog.
122122
"CPU usage ratio of a node (value between 0.0 and 1.0)",
123123
nil, prometheus.Labels{nodeNameLabel: nodeName}),
124124

125-
processCPUJoulesDescriptor: joulesDesc("process", "cpu", nodeName, []string{"pid", "comm", "exe", "type", cntrID, vmID, zone}),
126-
processCPUWattsDescriptor: wattsDesc("process", "cpu", nodeName, []string{"pid", "comm", "exe", "type", cntrID, vmID, zone}),
125+
processCPUJoulesDescriptor: joulesDesc("process", "cpu", nodeName, []string{"pid", "comm", "exe", "type", "state", cntrID, vmID, zone}),
126+
processCPUWattsDescriptor: wattsDesc("process", "cpu", nodeName, []string{"pid", "comm", "exe", "type", "state", cntrID, vmID, zone}),
127127
processCPUTimeDescriptor: timeDesc("process", "cpu", nodeName, []string{"pid", "comm", "exe", "type", cntrID, vmID}),
128128

129129
containerCPUJoulesDescriptor: joulesDesc("container", "cpu", nodeName, []string{cntrID, "container_name", "runtime", zone, podID}),
@@ -206,7 +206,9 @@ func (c *PowerCollector) Collect(ch chan<- prometheus.Metric) {
206206
}
207207

208208
c.collectNodeMetrics(ch, snapshot.Node)
209-
c.collectProcessMetrics(ch, snapshot.Processes)
209+
c.collectProcessMetrics(ch, "running", snapshot.Processes)
210+
c.collectProcessMetrics(ch, "terminated", snapshot.TerminatedProcesses)
211+
210212
c.collectContainerMetrics(ch, snapshot.Containers)
211213
c.collectVMMetrics(ch, snapshot.VirtualMachines)
212214
c.collectPodMetrics(ch, snapshot.Pods)
@@ -272,9 +274,9 @@ func (c *PowerCollector) collectNodeMetrics(ch chan<- prometheus.Metric, node *m
272274
}
273275

274276
// collectProcessMetrics collects process-level power metrics
275-
func (c *PowerCollector) collectProcessMetrics(ch chan<- prometheus.Metric, processes monitor.Processes) {
277+
func (c *PowerCollector) collectProcessMetrics(ch chan<- prometheus.Metric, state string, processes monitor.Processes) {
276278
if len(processes) == 0 {
277-
c.logger.Debug("No processes to export metrics for")
279+
c.logger.Debug("No processes to export metrics", "state", state)
278280
return
279281
}
280282

@@ -296,7 +298,7 @@ func (c *PowerCollector) collectProcessMetrics(ch chan<- prometheus.Metric, proc
296298
c.processCPUJoulesDescriptor,
297299
prometheus.CounterValue,
298300
usage.EnergyTotal.Joules(),
299-
pidStr, proc.Comm, proc.Exe, string(proc.Type),
301+
pidStr, proc.Comm, proc.Exe, string(proc.Type), state,
300302
proc.ContainerID, proc.VirtualMachineID,
301303
zoneName,
302304
)
@@ -305,7 +307,7 @@ func (c *PowerCollector) collectProcessMetrics(ch chan<- prometheus.Metric, proc
305307
c.processCPUWattsDescriptor,
306308
prometheus.GaugeValue,
307309
usage.Power.Watts(),
308-
pidStr, proc.Comm, proc.Exe, string(proc.Type),
310+
pidStr, proc.Comm, proc.Exe, string(proc.Type), state,
309311
proc.ContainerID, proc.VirtualMachineID,
310312
zoneName,
311313
)

0 commit comments

Comments
 (0)