Skip to content

Commit 8e72870

Browse files
committed
fix(tests/cpu-monitor): use psutil.Process for threads to fix errors
The previous fix didn't work because the read can also return ProcessLookupError. There is also the issue of pid recycling. For this reason, use a single psutil.Process to verify the process still exists, and catch any exception just in case. Signed-off-by: Riccardo Mancini <[email protected]>
1 parent d974044 commit 8e72870

File tree

2 files changed

+20
-22
lines changed

2 files changed

+20
-22
lines changed

tests/framework/utils.py

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import typing
1515
from collections import defaultdict, namedtuple
1616
from contextlib import contextmanager
17-
from pathlib import Path
1817
from typing import Dict
1918

2019
import psutil
@@ -56,50 +55,45 @@ def set_cpu_affinity(pid: int, cpulist: list) -> list:
5655
return psutil.Process(pid).cpu_affinity(real_cpulist)
5756

5857

59-
def get_thread_name(pid: int, tid: int) -> str:
60-
"""Return thread name from pid and tid pair."""
61-
try:
62-
return (
63-
Path("/proc", str(pid), "task", str(tid), "comm").read_text("utf-8").strip()
64-
)
65-
except FileNotFoundError as exc:
66-
raise psutil.NoSuchProcess(tid) from exc
67-
68-
6958
CpuTimes = namedtuple("CpuTimes", ["user", "system"])
7059

7160

72-
def get_cpu_times(pid: int) -> Dict[str, CpuTimes]:
61+
def get_cpu_times(process: psutil.Process) -> Dict[str, CpuTimes]:
7362
"""Return a dict mapping thread name to CPU usage (in seconds) since start."""
63+
# We're consciously ignoring whatever erorr is returned by psutil and returning
64+
# empty {} as result in case of any error retrieving the process threads
65+
# information
66+
# pylint: disable=locally-disabled, broad-exception-caught
67+
7468
threads = []
7569
try:
76-
threads = psutil.Process(pid).threads()
77-
except psutil.NoSuchProcess as exc:
78-
logging.warning("Process %d does not exist", pid, exc_info=exc)
70+
threads = process.threads()
71+
except Exception as exc:
72+
logging.warning("Process %d does not exist", process.pid, exc_info=exc)
7973
return {}
8074

8175
cpu_times = {}
8276
for thread in threads:
8377
try:
84-
thread_name = get_thread_name(pid, thread.id)
78+
thread_name = psutil.Process(thread.id).name()
8579
cpu_times[thread_name] = CpuTimes(thread.user_time, thread.system_time)
86-
except psutil.NoSuchProcess as exc:
80+
except Exception as exc:
8781
logging.warning("Thread %d no longer exists", thread.id, exc_info=exc)
8882
continue
8983

9084
return cpu_times
9185

9286

9387
def get_cpu_utilization(
94-
pid: int,
88+
process: psutil.Process,
9589
interval: int = 1,
9690
split_user_system: bool = False,
9791
) -> Dict[str, float | CpuTimes]:
9892
"""Return current process per thread CPU utilization over the interval (seconds)."""
9993
cpu_utilization = {}
100-
cpu_times_before = get_cpu_times(pid)
94+
cpu_times_before = get_cpu_times(process)
10195
time.sleep(interval)
102-
cpu_times_after = get_cpu_times(pid)
96+
cpu_times_after = get_cpu_times(process)
10397
threads = set(cpu_times_before.keys()) & set(cpu_times_after.keys())
10498
for thread_name in threads:
10599
before = cpu_times_before[thread_name]
@@ -125,8 +119,9 @@ def track_cpu_utilization(
125119
time.sleep(omit)
126120

127121
cpu_utilization = defaultdict(list)
122+
process = psutil.Process(pid)
128123
for _ in range(iterations):
129-
current_cpu_utilization = get_cpu_utilization(pid)
124+
current_cpu_utilization = get_cpu_utilization(process)
130125
assert len(current_cpu_utilization) > 0
131126

132127
for thread_name, value in current_cpu_utilization.items():

tests/host_tools/cpu_load.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import time
55
from threading import Thread
66

7+
import psutil
8+
79
from framework import utils
810

911

@@ -61,8 +63,9 @@ def run(self):
6163
6264
It is up to the caller to check the queue.
6365
"""
66+
process = psutil.Process(self._process_pid)
6467
while not self._should_stop:
65-
utilization = utils.get_cpu_utilization(self._process_pid)
68+
utilization = utils.get_cpu_utilization(process)
6669

6770
try:
6871
fc_thread_util = utilization["firecracker"]

0 commit comments

Comments
 (0)