From 69c401b809180bcf5197aba23eb23070e85f27cb Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Fri, 21 Jun 2024 11:52:22 -0700 Subject: [PATCH 1/7] dentry: add "ls" corelens module This simple module just lists the children of a dentry, much like ls would list the contents of a directory. Of course, not all children of a dentry are real files (as they may be negative), and not all real contents of a directory are necessarily cached as a child dentry. So it's not an exact comparison. But it's close enough. Signed-off-by: Stephen Brennan --- drgn_tools/dentry.py | 71 +++++++++++++++++++++++++++++++++++++++++++- tests/test_dentry.py | 4 +++ 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/drgn_tools/dentry.py b/drgn_tools/dentry.py index e3aab0d0..1a79d977 100644 --- a/drgn_tools/dentry.py +++ b/drgn_tools/dentry.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Oracle and/or its affiliates. +# Copyright (c) 2024, Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ """ Helpers for dentries. @@ -12,6 +12,8 @@ import drgn from drgn import Object from drgn import Program +from drgn.helpers.linux.fs import path_lookup +from drgn.helpers.linux.list import hlist_for_each_entry from drgn.helpers.linux.list import list_for_each_entry from drgn_tools.corelens import CorelensModule @@ -24,6 +26,27 @@ MNT_INTERNAL = 0x4000 +def dentry_for_each_child(dentry: Object) -> Iterator[Object]: + """ + Iterate over every child of a dentry + """ + # Commit da549bdd15c29 ("dentry: switch the lists of children to hlist") + # changes the list names and types. Try the older names first since all UEK + # versions have the older names. + try: + return list_for_each_entry( + "struct dentry", + dentry.d_subdirs.address_of_(), + "d_child", + ) + except AttributeError: + return hlist_for_each_entry( + "struct dentry", + dentry.d_children.address_of_(), + "d_sib", + ) + + def sb_first_mount_point(sb: Object) -> Optional[Object]: """ Return the first mountpoint of the superblock @@ -358,6 +381,52 @@ def __file_type(mode: Object) -> str: return "UNKN" +def ls(prog: Program, directory: str, count: bool = False) -> None: + """ + Print dentry children, like the ls command + :param directory: directory to print children of + :param count: when true, only print counts (not the full contents) + """ + dentries = dentry_for_each_child(path_lookup(prog, directory).dentry) + + pos = neg = 0 + for i, dentry in enumerate(dentries): + path = dentry_path_any_mount(dentry).decode() + if dentry_is_negative(dentry): + neg += 1 + else: + pos += 1 + if not count: + print(f"{i:05d} {path}") + print(f"{pos} positive, {neg} negative dentries") + + +class Ls(CorelensModule): + """List or count child dentries given a file path""" + + name = "ls" + + # This module shouldn't run for corelens reports, because it has a required + # argument. It's quite useful to run it interactively though. + run_when = "never" + + def add_args(self, parser: argparse.ArgumentParser) -> None: + parser.add_argument( + "directory", + type=str, + help="directory to list", + ) + parser.add_argument( + "--count", + "-c", + action="store_true", + help="only print counts, rather than every element", + ) + + def run(self, prog: Program, args: argparse.Namespace) -> None: + ls(prog, args.directory, count=args.count) + + class DentryCache(CorelensModule): """List dentries from the dentry hash table""" diff --git a/tests/test_dentry.py b/tests/test_dentry.py index 6ef5ae77..90fa413a 100644 --- a/tests/test_dentry.py +++ b/tests/test_dentry.py @@ -14,3 +14,7 @@ def test_for_each_dentry_in_hashtable(prog): def test_list_dentries_in_hashtable(prog): dentry.list_dentries_in_hashtable(prog, LIMIT) + + +def test_ls(prog): + dentry.ls(prog, "/") From 6839391dda1289bdef04e914aa2994dd53f81997 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Fri, 21 Jun 2024 11:54:20 -0700 Subject: [PATCH 2/7] bt: add indent argument This allows the bt() function to be used within other helpers without breaking up the indentation of the output. Signed-off-by: Stephen Brennan --- drgn_tools/bt.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/drgn_tools/bt.py b/drgn_tools/bt.py index f1af80f0..0a8d2cb7 100644 --- a/drgn_tools/bt.py +++ b/drgn_tools/bt.py @@ -241,7 +241,7 @@ def bt_frames( return expand_frames(stack_trace) -def print_task_header(task: drgn.Object) -> None: +def print_task_header(task: drgn.Object, indent: int = 0) -> None: """ Given a task struct, print the header line of the stack trace. """ @@ -253,8 +253,9 @@ def print_task_header(task: drgn.Object) -> None: cpu_note = "" if cpu_curr(task.prog_, cpu) == task: cpu_note = "!" + pfx = " " * indent print( - f"PID: {pid:<7d} TASK: {taskp:x} [{st}] CPU: {cpu}{cpu_note}" + f"{pfx}PID: {pid:<7d} TASK: {taskp:x} [{st}] CPU: {cpu}{cpu_note}" f' COMMAND: "{comm}"' ) @@ -265,6 +266,7 @@ def print_frames( show_vars: bool = False, show_absent: bool = False, start_idx: int = 0, + indent: int = 0, ) -> None: """ Print stack frames using the drgn-tools (crash-like) format @@ -274,8 +276,10 @@ def print_frames( :param trace: The stack trace or list of frames to print :param show_vars: True if you want to show variables :param show_absent: True if you further want to show absent variables - :start_idx: Where to start counting the frame indices from + :param start_idx: Where to start counting the frame indices from + :param indent: How many spaces to indent the output """ + pfx = " " * indent for i, frame in enumerate(trace): sp = frame.sp # drgn 0.0.22 intr = "!" if frame.interrupted else " " @@ -285,7 +289,7 @@ def print_frames( pc = "???" name = frame_name(prog, frame) idx = start_idx + i - out_line = f"{intr}#{idx:2d} [{sp:x}] {name} at {pc}" + out_line = f"{pfx}{intr}#{idx:2d} [{sp:x}] {name} at {pc}" try: file_, line, col = frame.source() out_line += f" {file_}:{line}:{col}" @@ -306,7 +310,8 @@ def print_frames( # This formats the registers in three columns. for j in range(0, len(regnames), 3): print( - " " * 5 + pfx + + " " * 5 + " ".join( f"{reg.upper():>3s}: {registers[reg]:016x}" for reg in regnames[j : j + 3] @@ -336,13 +341,14 @@ def print_frames( if val.absent_ and not show_absent: continue val_str = val.format_(dereference=False).replace("\n", "\n ") - print(" " * 5 + f"{local} = {val_str}") + print(pfx + " " * 5 + f"{local} = {val_str}") def print_traces( traces: t.List[drgn.StackTrace], show_vars: bool = False, show_absent: bool = False, + indent: int = 0, ) -> None: """ Given a list of stack traces, print them in the crash-like format @@ -357,13 +363,15 @@ def print_traces( idx = 0 prog = traces[0].prog for trace_idx, trace in enumerate(traces): - print_frames(prog, trace, show_vars=show_vars, start_idx=idx) + print_frames( + prog, trace, show_vars=show_vars, start_idx=idx, indent=indent + ) idx += len(trace) # Ok, this is the end of the loop over each frame within the trace. if trace_idx < len(traces) - 1: # But there is still another segment - print(" -- continuing to previous stack -- ") + print(" " * indent + " -- continuing to previous stack -- ") def bt( @@ -373,6 +381,7 @@ def bt( show_vars: bool = False, show_absent: bool = False, retframes: bool = False, + indent: int = 0, ) -> t.Optional[t.List[drgn.StackFrame]]: """ Format a crash-like stack trace. @@ -418,6 +427,7 @@ def bt( to include absent variables. Normally there's no reason to see this, since absent variables have no information. :param retframes: When true, returns a list of stack frames. + :param indent: Number of spaces to indent all output lines :returns: A list of the stack frames which were printed. This can be useful for accessing the variables out of the frames interactively. If you're writing a script that needs to access frames, you may want to consider the @@ -431,13 +441,15 @@ def bt( "struct task_struct *", ): state = task_state_to_char(task) - print_task_header(task) + print_task_header(task, indent=indent) if state in ("Z", "X"): print(f"Task is in state: {state} - cannot unwind") return [] traces = expand_traces(task.prog_.stack_trace(task)) - print_traces(traces, show_vars=show_vars, show_absent=show_absent) + print_traces( + traces, show_vars=show_vars, show_absent=show_absent, indent=indent + ) frames = None if retframes: frames = [] From 5d3863865a27b012ac16b6cda63aa36ec707a649 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Fri, 21 Jun 2024 11:55:04 -0700 Subject: [PATCH 3/7] bt: skip idle tasks in print_online_bt() This just makes sense, idle tasks aren't terribly interesting. That said, make this a kwarg just so users can still look at them if they want. Signed-off-by: Stephen Brennan --- drgn_tools/bt.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drgn_tools/bt.py b/drgn_tools/bt.py index 0a8d2cb7..e232e0d7 100644 --- a/drgn_tools/bt.py +++ b/drgn_tools/bt.py @@ -570,13 +570,27 @@ def bt_has( return bt_has_any(prog, [funcname], task) -def print_online_bt(prog: Program, **kwargs: t.Any) -> None: +def print_online_bt( + prog: Program, skip_idle: bool = True, **kwargs: t.Any +) -> None: """ Prints the stack trace of all on-CPU tasks :kwargs: passed to bt() to control backtrace format """ for cpu in for_each_online_cpu(prog): + task = cpu_curr(prog, cpu) + if skip_idle and task.comm.string_().decode() == f"swapper/{cpu}": + # Just because it's the swapper task, does not mean it is idling. + # Check the symbol at the top of the stack to ensure it's the + # architecture idle function. + trace = prog.stack_trace(task) + try: + sym = trace[0].symbol().name + if sym in ("intel_idle",): + continue + except (IndexError, LookupError): + pass bt(prog, cpu=cpu, **kwargs) print() From a5db85db3708fddbce336804c2c2d3deaa732742 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Fri, 21 Jun 2024 11:59:50 -0700 Subject: [PATCH 4/7] Add fsnotify corelens module This module aids in understanding the state of the fsnotify subsystem: what inodes / superblocks / vfsmounts are being watched, who is waiting for events, and in the case of fanotify, who is waiting on an access response from userspace. Signed-off-by: Stephen Brennan --- drgn_tools/fsnotify.py | 388 +++++++++++++++++++++++++++++++++++++++++ drgn_tools/util.py | 14 ++ tests/test_fsnotify.py | 7 + 3 files changed, 409 insertions(+) create mode 100644 drgn_tools/fsnotify.py create mode 100644 tests/test_fsnotify.py diff --git a/drgn_tools/fsnotify.py b/drgn_tools/fsnotify.py new file mode 100644 index 00000000..5cd424d9 --- /dev/null +++ b/drgn_tools/fsnotify.py @@ -0,0 +1,388 @@ +# Copyright (c) 2024, Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ +""" +Helpers for diagnosing issues with dnotify, inotify, fanotify: the "fsnotify" +subsystem. +""" +import argparse +from typing import Dict +from typing import Iterator +from typing import Tuple + +from drgn import cast +from drgn import container_of +from drgn import NULL +from drgn import Object +from drgn import Program +from drgn.helpers.common.format import decode_flags +from drgn.helpers.common.format import escape_ascii_string +from drgn.helpers.linux.fs import for_each_file +from drgn.helpers.linux.list import hlist_for_each_entry +from drgn.helpers.linux.list import list_count_nodes +from drgn.helpers.linux.list import list_for_each_entry +from drgn.helpers.linux.pid import for_each_task +from drgn.helpers.linux.slab import slab_object_info +from drgn.helpers.linux.wait import waitqueue_active +from drgn.helpers.linux.wait import waitqueue_for_each_entry + +from drgn_tools.bt import bt +from drgn_tools.corelens import CorelensModule +from drgn_tools.dentry import dentry_path_any_mount +from drgn_tools.dentry import sb_first_mount_point +from drgn_tools.task import is_group_leader +from drgn_tools.util import type_has_member + +FSNOTIFY_FLAGS = { + # Prefixed by "FS_" in the code: include/linux/fsnotify_backend.h + # The prefix is removed for nicer printing. + "ACCESS": 0x00000001, # File was accessed + "MODIFY": 0x00000002, # File was modified + "ATTRIB": 0x00000004, # Metadata changed + "CLOSE_WRITE": 0x00000008, # Writtable file was closed + "CLOSE_NOWRITE": 0x00000010, # Unwrittable file closed + "OPEN": 0x00000020, # File was opened + "MOVED_FROM": 0x00000040, # File was moved from X + "MOVED_TO": 0x00000080, # File was moved to Y + "CREATE": 0x00000100, # Subfile was created + "DELETE": 0x00000200, # Subfile was deleted + "DELETE_SELF": 0x00000400, # Self was deleted + "MOVE_SELF": 0x00000800, # Self was moved + "OPEN_EXEC": 0x00001000, # File was opened for exec + "UNMOUNT": 0x00002000, # inode on umount fs + "Q_OVERFLOW": 0x00004000, # Event queued overflowed + "ERROR": 0x00008000, # Filesystem Error (fanotify) + "OPEN_PERM": 0x00010000, # open event in an permission hook + "ACCESS_PERM": 0x00020000, # access event in a permissions hook + "OPEN_EXEC_PERM": 0x00040000, # open/exec event in a permission hook + "EVENT_ON_CHILD": 0x08000000, + "RENAME": 0x10000000, # File was renamed + "DN_MULTISHOT": 0x20000000, # dnotify multishot + "ISDIR": 0x40000000, # event occurred against dir +} + + +def fsnotify_group_for_each_mark(group: Object) -> Iterator[Object]: + """ + Iterate over all fsnotify marks for a given group. + :param group: ``struct fsnotify_group *`` + :returns: iterator of ``struct fsnotify_mark *`` + """ + return list_for_each_entry( + "struct fsnotify_mark", group.marks_list.address_of_(), "g_list" + ) + + +def fsnotify_mark_object(mark: Object) -> Tuple[str, Object]: + """ + For an fsnotify mark, determine what kind of object and return it + + Fsnotify marks can be applied to an inode, superblock, or vfsmount. Identify + which kind of object the mark is applied to, and return that along with a + pointer to the object. If we don't understand the object type, then we + return ("unknown", NULL). + + :param mark: ``struct fsnotify-mark *`` + :returns: (object type, object pointer) + """ + conn = mark.connector + prog = mark.prog_ + + if not hasattr(conn, "type"): + # Commit d6f7b98bc8147 ("fsnotify: use type id to identify connector + # object type") adds a type field to the connector. Before this, type + # was expressed as bits in the flag field. The bit numbers were + # preprocessor definitions, let's just hardcode them here. + if conn.flags & 0x1: + return "inode", conn.inode + elif conn.flags & 0x2: + return "vfsmount", conn.vfsmount + else: + return "unknown", NULL(prog, "void *") + # See fsnotify_conn_{inode,mount,sb} in fs/notify/fsnotify.h + if conn.type == prog.constant("FSNOTIFY_OBJ_TYPE_INODE"): + # Prior to 36f10f55ff1d2 ("fsnotify: let connector point to an abstract + # object"), there were direct pointers in the connector. + if hasattr(conn, "inode"): + return "inode", conn.inode + return "inode", container_of( + conn.obj, "struct inode", "i_fsnotify_marks" + ) + elif conn.type == prog.constant("FSNOTIFY_OBJ_TYPE_VFSMOUNT"): + # Prior to 36f10f55ff1d2 ("fsnotify: let connector point to an abstract + # object"), there were direct pointers in the connector. + if hasattr(conn, "vfsmount"): + return "vfsmount", conn.vfsmount + return "vfsmount", container_of( + conn.obj, "struct mount", "mnt_fsnotify_marks" + ) + elif conn.type == prog.constant("FSNOTIFY_OBJ_TYPE_SB"): + # The "sb" object type was not present when 36f10f55ff1d2 ("fsnotify: + # let connector point to an abstract object") so it will never have an + # "sb" field. + return "sb", container_of( + conn.obj, "struct super_block", "s_fsnotify_marks" + ) + else: + return "unknown", NULL(prog, "void *") + + +def hlist_first_entry_or_null(type: str, head: Object, field: str): + # Return the first entry of an hlist, or NULL. Equivalent to the drgn + # list_first_entry_or_null function, just a useful helper. + for obj in hlist_for_each_entry(type, head, field): + return obj + return NULL(head.prog_, type + " *") + + +def fsnotify_summarize_object(kind: str, obj: Object) -> str: + """ + Given an object marked by fsnotify, return a string representation + + This is typically a file path: either the path to the watched file/dir, or + the path to the mounted filesystem when a vfsmount or superblock. It should + be noted that in all cases, there can be multiple paths (e.g. hard linked + files, multiple mounts, etc). We output only one and hope it is useful. + + :param kind: either inode, vfsmount, sb, or unknown + :param obj: a corresponding drgn object (see :func:`fsnotify_mark_object()`) + :returns: a string representation for printing to a user + """ + if kind == "inode": + # Arbitrarily choose the first dentry for this inode, and further use + # the first mount point all the way up the tree. We just want something + # useful, not exhaustive. + # 946e51f2bf37f ("move d_rcu from overlapping d_child to overlapping d_alias") + field = ( + "d_alias" + if type_has_member(obj.prog_, "struct dentry", "d_alias") + else "d_u.d_alias" + ) + dentry = hlist_first_entry_or_null( + "struct dentry", obj.i_dentry.address_of_(), field + ) + if dentry: + return escape_ascii_string(dentry_path_any_mount(dentry)) + else: + return "(ANON INODE)" + elif kind == "vfsmount": + fstype = obj.mnt.mnt_sb.s_type.name.string_().decode() + path = escape_ascii_string(dentry_path_any_mount(obj.mnt_mountpoint)) + return f"FS:{fstype} MOUNT:{path}" + pass + elif kind == "sb": + fstype = obj.s_type.name.string_().decode() + first = sb_first_mount_point(obj) + path = escape_ascii_string(dentry_path_any_mount(first)) + return f"SUPER:{fstype} ({path})" + else: + return "(not implemented)" + + +def print_waitqueue( + wq: Object, indent: int = 2, stack_trace: bool = False +) -> None: + """ + Print the waiters of a waitqueue + + This function enumerates all entries of a wait queue, and prints out + information about each entry. Many entries are simply a task directly + waiting. However, wait queues may be waited on by select and epoll objects, + and probably other possibilities too. This function tries to print enough + information to know who is waiting on a waitqueue, even if there's a select + or epoll happening. Since epoll objects themselves could be waited upon, + it's possible that this function will recursively call itself. + + :param wq: the ``wait_queue_head_t`` object + :param indent: indentation for the output + :param stack_trace: whether to print stack trace for waiters + """ + if not waitqueue_active(wq): + print(" ") + return + prog = wq.prog_ + pfx = " " * indent + for entry in waitqueue_for_each_entry(wq): + func = "UNKNOWN" + try: + func = prog.symbol(entry.func.value_()).name + except LookupError: + pass + + if func == "pollwake": + wqueues = cast("struct poll_wqueues *", entry.private) + task = wqueues.polling_task + print( + f"{pfx}[PID: {task.pid.value_()} COMM: {task.comm.string_().decode()} WAIT: select]" + ) + if stack_trace: + bt(task, indent=indent + 2) + elif func == "ep_poll_callback": + epitem = container_of(entry, "struct eppoll_entry", "wait").base + ep = epitem.ep + print(f"{pfx}[EVENTPOLL: {ep.value_():x}]") + found_waiter = False + if waitqueue_active(ep.wq): + print(f"{pfx}Waiting in epoll_wait():") + print_waitqueue(ep.wq, indent + 2, stack_trace=stack_trace) + found_waiter = True + if waitqueue_active(ep.poll_wait): + print(f"{pfx}Waiting in file->poll():") + print_waitqueue( + ep.poll_wait, indent + 2, stack_trace=stack_trace + ) + found_waiter = True + if not found_waiter: + print(f"{pfx}No waiters found.") + else: + info = slab_object_info(entry.private) + if info and info.slab_cache.name.string_() == b"task_struct": + task = cast("struct task_struct *", entry.private) + print( + f"{pfx}[PID: {task.pid.value_()} COMM: {task.comm.string_().decode()} WAIT: direct]" + ) + if stack_trace: + bt(task, indent=indent + 2) + + +def fsnotify_group_report( + group: Object, group_kind: str, verbose: int = 1 +) -> None: + """ + Print a report about an fsnotify group. + :param group: ``struct fsnotify_group *`` + :param group_kind: either inotify or fanotify + :param verbose: a verbosity level: + 0: summarize only + 1: output vfsmounts and super blocks, and a limited number of inodes + 2: same as above, but also include stack traces for waiters + 3: output every marked inode (this could be a very large amount) + """ + print(f"FSNOTIFY GROUP: {group.value_():x}") + kind_counts: Dict[str, int] = {} + for mark in fsnotify_group_for_each_mark(group): + kind, ptr = fsnotify_mark_object(mark) + kind_counts[kind] = kind_counts.get(kind, 0) + 1 + mask = decode_flags( + mark.mask, FSNOTIFY_FLAGS.items(), bit_numbers=False + ) + # 8e17bf975102c ("fanotify: prepare for setting event flags in ignore + # mask") + try: + ignore_mask = decode_flags( + mark.ignore_mask, FSNOTIFY_FLAGS.items(), bit_numbers=False + ) + except AttributeError: + ignore_mask = decode_flags( + mark.ignored_mask, FSNOTIFY_FLAGS.items(), bit_numbers=False + ) + try: + count = mark.refcnt.refs.counter.value_() + except AttributeError: + # 7761daa6a1599 ("fsnotify: convert fsnotify_group.refcnt from + # atomic_t to refcount_t") + count = mark.refcnt.counter.value_() + summary = fsnotify_summarize_object(kind, ptr) + if verbose < 1: + continue + if verbose < 3 and kind == "inode": + if kind_counts[kind] == 10: + print( + " " + ) + if kind_counts[kind] >= 10: + continue + print(f" MARK: {kind} {ptr.value_():x} {summary}") + print(f" CNT:{count} MASK:{mask} IGN:{ignore_mask}") + print( + "OBJECT SUMMARY: " + + ", ".join(f"{kind}: {count}" for kind, count in kind_counts.items()) + ) + + pending_notifications = list_count_nodes( + group.notification_list.address_of_() + ) + print(f"{pending_notifications} notifications are pending.") + print("Tasks waiting for notification:") + print_waitqueue(group.notification_waitq, stack_trace=verbose >= 2) + + if group_kind == "fanotify": + resp_cnt = list_count_nodes( + group.fanotify_data.access_list.address_of_() + ) + print(f"{resp_cnt} pending permission responses") + print("Tasks waiting for permission response from userspace:") + print_waitqueue( + group.fanotify_data.access_waitq, stack_trace=verbose >= 2 + ) + elif group_kind in ("inotify", "dnotify"): + pass # nothing special to report + else: + print(f"unknown kind {group_kind}") + + +def fsnotify_show(prog: Program, verbose: int = 1) -> None: + """ + Print a report of every fsnotify group on the system. + + This enumerates all fsnotify and inotify groups, by iterating over each task + & finding relevant files. Each one has a report printed. Finally, the system + dnotify group (there is only one) is printed. + + :param verbose: verbosity level (see :func:`fsnotify_group_report()`) + """ + fanotify_ops = prog["fanotify_fops"].address_of_() + inotify_ops = prog["inotify_fops"].address_of_() + group_type = prog.type("struct fsnotify_group *") + seen_groups = set() + for task in for_each_task(prog): + # No point in looking at threads, since file descriptions are shared. + if not is_group_leader(task): + continue + + for fd, file in for_each_file(task): + if file and file.f_op == fanotify_ops: + kind = "fanotify" + elif file and file.f_op == inotify_ops: + kind = "inotify" + else: + continue + print( + f"[PID {task.pid.value_()} COMM: {task.comm.string_().decode()} {kind} FD {fd}]" + ) + group = cast(group_type, file.private_data) + + # Since file descriptors can be shared even across tasks, we need to + # track groups we've already reported and skip re-reporting. This + # reduces the output size and runtime. For example, crond seems to + # share an inotify FD across tasks. + if group.value_() not in seen_groups: + seen_groups.add(group.value_()) + fsnotify_group_report(group, kind, verbose=verbose) + else: + print(f"FSNOTIFY GROUP {group.value_():x}: already seen") + print() + if prog["dnotify_group"]: + # dnotify_group can be NULL early in boot. No use crashing if that's the + # case. + print("[SYSTEM DNOTIFY GROUP]") + fsnotify_group_report( + prog["dnotify_group"], "dnotify", verbose=verbose + ) + + +class Fsnotify(CorelensModule): + """Print details about the fsnotify subsystem""" + + name = "fsnotify" + + def add_args(self, parser: argparse.ArgumentParser) -> None: + parser.add_argument( + "--verbose", + "-v", + type=int, + default=1, + help="Set verbosity: 0-4 (default 1)", + ) + + def run(self, prog: Program, args: argparse.Namespace) -> None: + fsnotify_show(prog, verbose=args.verbose) diff --git a/drgn_tools/util.py b/drgn_tools/util.py index 41299163..e58818ce 100644 --- a/drgn_tools/util.py +++ b/drgn_tools/util.py @@ -85,6 +85,20 @@ def kernel_version(prog: Program) -> t.Tuple[int, int, int]: return (int(maj), int(min), int(patch)) +def type_has_member(prog: Program, typ: str, name: str) -> bool: + """ + Return true if a given object has a member with the given name. + :param typ: type name to check + :param name: string member name to check + :returns: whether the object has a member by that name + """ + try: + prog.type(typ).member(name) + return True + except LookupError: + return False + + def has_member(obj: Object, name: str) -> bool: """ Return true if a given object has a member with the given name. diff --git a/tests/test_fsnotify.py b/tests/test_fsnotify.py new file mode 100644 index 00000000..fa512367 --- /dev/null +++ b/tests/test_fsnotify.py @@ -0,0 +1,7 @@ +# Copyright (c) 2024, Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ +from drgn_tools.fsnotify import fsnotify_show + + +def test_fsnotify(prog): + fsnotify_show(prog) From 77f9e51138b46f2eb4f791ab02ea604fb30782d2 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Fri, 21 Jun 2024 12:01:16 -0700 Subject: [PATCH 5/7] task: add for_each_task_in_group to iterate process threads Signed-off-by: Stephen Brennan --- drgn_tools/task.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drgn_tools/task.py b/drgn_tools/task.py index ce90f762..aa6ad630 100644 --- a/drgn_tools/task.py +++ b/drgn_tools/task.py @@ -450,6 +450,29 @@ def for_each_task_in_state(prog: drgn.Program, state: str) -> Iterable[Object]: yield task +def for_each_task_in_group( + task: Object, include_self: bool = False +) -> Iterable[Object]: + """ + Iterate over all tasks in the thread group + + Or, in the more common userspace terms, iterate over all threads of a + process. + + :param task: a task whose group to iterate over + :param include_self: should ``task`` itself be returned + :returns: an iterable of every thread in the thread group + """ + if include_self: + yield task + for gtask in list_for_each_entry( + "struct task_struct", + task.thread_group.address_of_(), + "thread_group", + ): + yield gtask + + def count_tasks_in_state(prog: drgn.Program, state: str) -> int: """ Count all tasks in a given state. From 5ce976df51b4b2a93cd34acfeea2e624453091e7 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Wed, 26 Jun 2024 13:58:08 -0700 Subject: [PATCH 6/7] fsnotify: add support for UEK4 Hopefully we will never need this, but it allows the UEK4 live and vmcore tests to pass. Signed-off-by: Stephen Brennan --- drgn_tools/fsnotify.py | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/drgn_tools/fsnotify.py b/drgn_tools/fsnotify.py index 5cd424d9..e53642ea 100644 --- a/drgn_tools/fsnotify.py +++ b/drgn_tools/fsnotify.py @@ -72,6 +72,18 @@ def fsnotify_group_for_each_mark(group: Object) -> Iterator[Object]: ) +def _get_object_no_type(obj: Object) -> Tuple[str, Object]: + # obj may be: + # - struct fsnotify_mark_connector (if it exists) + # - struct fsnotify_mark (if this kernel version has no connector struct) + if obj.flags & 0x1: + return "inode", obj.inode + elif obj.flags & 0x2: + return "vfsmount", obj.vfsmount + else: + return "unknown", NULL(obj.prog_, "void *") + + def fsnotify_mark_object(mark: Object) -> Tuple[str, Object]: """ For an fsnotify mark, determine what kind of object and return it @@ -81,25 +93,31 @@ def fsnotify_mark_object(mark: Object) -> Tuple[str, Object]: pointer to the object. If we don't understand the object type, then we return ("unknown", NULL). - :param mark: ``struct fsnotify-mark *`` + :param mark: ``struct fsnotify_mark *`` :returns: (object type, object pointer) """ - conn = mark.connector prog = mark.prog_ - if not hasattr(conn, "type"): + try: + conn = mark.connector + except AttributeError: + # Commit 9dd813c15b2c1 ("fsnotify: Move mark list head from object into + # dedicated structure") is the beginning of a series that introduces the + # fsnotify_mark_connector. Prior to this, the mark directly pointed at + # the object it contained. This was merged in 4.12. + return _get_object_no_type(mark) + + try: + type_ = conn.type.read_() + except AttributeError: # Commit d6f7b98bc8147 ("fsnotify: use type id to identify connector # object type") adds a type field to the connector. Before this, type # was expressed as bits in the flag field. The bit numbers were # preprocessor definitions, let's just hardcode them here. - if conn.flags & 0x1: - return "inode", conn.inode - elif conn.flags & 0x2: - return "vfsmount", conn.vfsmount - else: - return "unknown", NULL(prog, "void *") + return _get_object_no_type(conn) + # See fsnotify_conn_{inode,mount,sb} in fs/notify/fsnotify.h - if conn.type == prog.constant("FSNOTIFY_OBJ_TYPE_INODE"): + if type_ == prog.constant("FSNOTIFY_OBJ_TYPE_INODE"): # Prior to 36f10f55ff1d2 ("fsnotify: let connector point to an abstract # object"), there were direct pointers in the connector. if hasattr(conn, "inode"): @@ -107,7 +125,7 @@ def fsnotify_mark_object(mark: Object) -> Tuple[str, Object]: return "inode", container_of( conn.obj, "struct inode", "i_fsnotify_marks" ) - elif conn.type == prog.constant("FSNOTIFY_OBJ_TYPE_VFSMOUNT"): + elif type_ == prog.constant("FSNOTIFY_OBJ_TYPE_VFSMOUNT"): # Prior to 36f10f55ff1d2 ("fsnotify: let connector point to an abstract # object"), there were direct pointers in the connector. if hasattr(conn, "vfsmount"): @@ -115,7 +133,7 @@ def fsnotify_mark_object(mark: Object) -> Tuple[str, Object]: return "vfsmount", container_of( conn.obj, "struct mount", "mnt_fsnotify_marks" ) - elif conn.type == prog.constant("FSNOTIFY_OBJ_TYPE_SB"): + elif type_ == prog.constant("FSNOTIFY_OBJ_TYPE_SB"): # The "sb" object type was not present when 36f10f55ff1d2 ("fsnotify: # let connector point to an abstract object") so it will never have an # "sb" field. From 07e7c769939daf4319372808fa1e2a44b22e0e60 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Wed, 26 Jun 2024 14:16:27 -0700 Subject: [PATCH 7/7] bt: add the correct idle function for aarch64 Also include the native_safe_halt() when a swapper task is halted. Signed-off-by: Stephen Brennan --- drgn_tools/bt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drgn_tools/bt.py b/drgn_tools/bt.py index e232e0d7..518a7755 100644 --- a/drgn_tools/bt.py +++ b/drgn_tools/bt.py @@ -583,11 +583,11 @@ def print_online_bt( if skip_idle and task.comm.string_().decode() == f"swapper/{cpu}": # Just because it's the swapper task, does not mean it is idling. # Check the symbol at the top of the stack to ensure it's the - # architecture idle function. + # architecture idle function, or if the swapper task got halted. trace = prog.stack_trace(task) try: sym = trace[0].symbol().name - if sym in ("intel_idle",): + if sym in ("intel_idle", "arch_cpu_idle", "native_safe_halt"): continue except (IndexError, LookupError): pass