Skip to content

Commit

Permalink
feat(events): add io_uring visibility
Browse files Browse the repository at this point in the history
add events to allow visibility to io_uring operations in the system.
the added events are:
io_uring_create - creation of io_uring queue
io_uring_submit_req - submission of io_uring request
io_write - operation of write using io_uring
  • Loading branch information
roikol committed Oct 23, 2023
1 parent 1ecedcc commit 5f68284
Show file tree
Hide file tree
Showing 13 changed files with 512 additions and 10 deletions.
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ go 1.19
require (
github.com/IBM/fluent-forward-go v0.2.1
github.com/Masterminds/sprig/v3 v3.2.3
github.com/aquasecurity/libbpfgo v0.5.0-libbpf-1.2
github.com/aquasecurity/libbpfgo/helpers v0.4.6-0.20230321190037-f591a2c5734f
github.com/aquasecurity/libbpfgo v0.5.1-libbpf-1.2.0.20230928114152-cf2d0bea778b
github.com/aquasecurity/libbpfgo/helpers v0.4.6-0.20230928114152-cf2d0bea778b
github.com/aquasecurity/tracee/api v0.0.0-20231013014739-b32a168ee6a8
github.com/aquasecurity/tracee/types v0.0.0-20231013014739-b32a168ee6a8
github.com/containerd/containerd v1.7.0
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ github.com/agnivade/levenshtein v1.1.1/go.mod h1:veldBMzWxcCG2ZvUTKD2kJNRdCk5hVb
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230321174746-8dcc6526cfb1 h1:X8MJ0fnN5FPdcGF5Ij2/OW+HgiJrRg3AfHAx1PJtIzM=
github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230321174746-8dcc6526cfb1/go.mod h1:pSwJ0fSY5KhvocuWSx4fz3BA8OrA1bQn+K1Eli3BRwM=
github.com/aquasecurity/libbpfgo v0.5.0-libbpf-1.2 h1:Yywi9wC3GPDOgR8wr6P9geY2qvFqKxH5sctMOssw+MQ=
github.com/aquasecurity/libbpfgo v0.5.0-libbpf-1.2/go.mod h1:0rEApF1YBHGuZ4C8OYI9q5oDBVpgqtRqYATePl9mCDk=
github.com/aquasecurity/libbpfgo/helpers v0.4.6-0.20230321190037-f591a2c5734f h1:l127H3NqJBmw+XMt+haBOeZIrBppuw7TJz26cWMI9kY=
github.com/aquasecurity/libbpfgo/helpers v0.4.6-0.20230321190037-f591a2c5734f/go.mod h1:j/TQLmsZpOIdF3CnJODzYngG4yu1YoDCoRMELxkQSSA=
github.com/aquasecurity/libbpfgo v0.5.1-libbpf-1.2.0.20230928114152-cf2d0bea778b h1:waJ52oNyHnFIOwTKbw8EMZ7ZsvZ7rPFqFcMMyi9EWqA=
github.com/aquasecurity/libbpfgo v0.5.1-libbpf-1.2.0.20230928114152-cf2d0bea778b/go.mod h1:0rEApF1YBHGuZ4C8OYI9q5oDBVpgqtRqYATePl9mCDk=
github.com/aquasecurity/libbpfgo/helpers v0.4.6-0.20230928114152-cf2d0bea778b h1:IWC4AIIhcZJhkRRJiWg9G8cx2/8ntePqmsnEbq7N1XY=
github.com/aquasecurity/libbpfgo/helpers v0.4.6-0.20230928114152-cf2d0bea778b/go.mod h1:9gxdXex9MiHcJmPEybiO3JSHnNyWe7X8FtNtHQ4Evns=
github.com/aquasecurity/tracee/api v0.0.0-20231006160439-f3bc7d1e9299 h1:pswolShOclg4Jh7FX0WImcfFpSToDo2U9rFrdmpQ74Y=
github.com/aquasecurity/tracee/api v0.0.0-20231006160439-f3bc7d1e9299/go.mod h1:wH09uZ34SUP/3QAprCnqA0pH3hNwZoroCW9QSXCF2eY=
github.com/aquasecurity/tracee/api v0.0.0-20231013014739-b32a168ee6a8 h1:NGzPDvQofEG04CoPZjSSRoFMxnSd3Brh39BY1dmdyZM=
Expand Down
3 changes: 3 additions & 0 deletions pkg/ebpf/c/maps.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ enum tail_call_id_e
TAIL_HIDDEN_KERNEL_MODULE_KSET,
TAIL_HIDDEN_KERNEL_MODULE_MOD_TREE,
TAIL_HIDDEN_KERNEL_MODULE_NEW_MOD_ONLY,
TAIL_IO_WRITE,
MAX_TAIL_CALL
};

Expand Down Expand Up @@ -118,6 +119,8 @@ BPF_PERCPU_ARRAY(scratch_map, scratch_t, 1); // scratch sp
BPF_LRU_HASH(file_modification_map, file_mod_key_t, int, 10240); // hold file data to decide if should submit file modification event
BPF_LRU_HASH(io_file_path_cache_map, file_id_t, path_buf_t, 5); // store cache for IO operations path
BPF_LRU_HASH(elf_files_map, file_id_t, bool, 64); // store cache for file ELF type check
BPF_LRU_HASH(uring_poll_ctx_map, u32, event_context_t, 1024); // store real context of io_uring polling operations
BPF_LRU_HASH(uring_worker_ctx_map, u64, event_context_t, 1024); // store real context for io_uring worker operations

// clang-format on

Expand Down
319 changes: 315 additions & 4 deletions pkg/ebpf/c/tracee.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -2845,7 +2845,7 @@ submit_magic_write(program_data_t *p, file_info_t *file_info, io_data_t io_data,
statfunc bool should_submit_io_event(u32 event_id, program_data_t *p)
{
return ((event_id == VFS_READ || event_id == VFS_READV || event_id == VFS_WRITE ||
event_id == VFS_WRITEV || event_id == __KERNEL_WRITE) &&
event_id == VFS_WRITEV || event_id == __KERNEL_WRITE || event_id == IO_WRITE) &&
should_submit(event_id, p->event));
}

Expand Down Expand Up @@ -3116,6 +3116,127 @@ int BPF_KPROBE(trace_ret_kernel_write_tail)
return capture_file_write(ctx, __KERNEL_WRITE, true);
}

SEC("kprobe/io_write")
TRACE_ENT_FUNC(io_write, IO_WRITE);

SEC("kretprobe/io_write")
int BPF_KPROBE(trace_ret_io_write)
{
args_t saved_args;
if (load_args(&saved_args, IO_WRITE) != 0) {
// missed entry or not traced
return 0;
}

program_data_t p = {};
if (!init_program_data(&p, ctx)) {
del_args(IO_WRITE);
return 0;
}

bool should_submit = should_submit_io_event(IO_WRITE, &p);
if (!should_submit) {
bpf_tail_call(ctx, &prog_array, TAIL_IO_WRITE);
del_args(IO_WRITE);
return 0;
}

// don't proceed because the write operation wasn't successfull
int ret = PT_REGS_RC(ctx);
if (ret < 0) {
del_args(IO_WRITE);
return 0;
}

struct io_kiocb *req = (struct io_kiocb *) saved_args.args[0];

// get real task info from uring_worker_ctx_map
u32 host_tid = p.task_info->context.host_tid;
event_context_t *real_ctx = bpf_map_lookup_elem(&uring_worker_ctx_map, &req);
if (real_ctx != NULL) {
p.event->context = *real_ctx;
bpf_map_delete_elem(&uring_worker_ctx_map, &req);
}

// get write info from req
struct io_rw *rw = NULL;
struct kiocb kiocb;
u64 addr;
void *buf;
u32 len;
if (bpf_core_field_exists(req->cmd)) { // Version >= v6
struct io_cmd_data io_cmd = BPF_CORE_READ(req, cmd);
rw = (struct io_rw *) &io_cmd;
kiocb = BPF_CORE_READ(rw, kiocb);

addr = BPF_CORE_READ(rw, addr);
buf = (void *) addr;
len = BPF_CORE_READ(rw, len);
} else {
struct io_kiocb___older_v6 *req_55 = (void *) req;
if (bpf_core_field_exists(req_55->connect)) { // Version >= v5.5
rw = &req_55->rw;
kiocb = BPF_CORE_READ(rw, kiocb);

addr = BPF_CORE_READ(rw, addr);
buf = (void *) addr;
len = BPF_CORE_READ(rw, len);
} else { // Version >= v5.1
struct io_kiocb___older_v55 *req_51 = (void *) req_55;
kiocb = BPF_CORE_READ(req_51, rw);
struct sqe_submit submit = BPF_CORE_READ(req_51, submit);
const struct io_uring_sqe *sqe = submit.sqe;

addr = BPF_CORE_READ(sqe, addr);
buf = (void *) addr;
len = BPF_CORE_READ(sqe, len);
}
}

// get write position
// (reusing io_kiocb struct flavors to get the correct data for the current kernel version)
loff_t ki_pos = kiocb.ki_pos;
u32 bytes_done = 0;
if (bpf_core_field_exists(req->cqe)) { // Version >= v5.19
struct io_cqe cqe = BPF_CORE_READ(req, cqe);
bytes_done = cqe.res;
} else { // Version >= v5.10
struct io_kiocb___older_v6 *req_55 = (void *) req;
if (bpf_core_field_exists(req_55->result)) { // Version >= v5.3
bytes_done = BPF_CORE_READ(req_55, result);
} else { // Version >= v5.1
bytes_done = BPF_CORE_READ(req_55, error);
}
}
loff_t pos = ki_pos - bytes_done;

// get file info
struct file *file = kiocb.ki_filp;
file_info_t file_info = get_file_info(file);

save_str_to_buf(&p.event->args_buf, file_info.pathname_p, 0);
save_to_submit_buf(&p.event->args_buf, &pos, sizeof(loff_t), 1);
save_to_submit_buf(&p.event->args_buf, &buf, sizeof(void *), 2);
save_to_submit_buf(&p.event->args_buf, &len, sizeof(u32), 3);
save_to_submit_buf(&p.event->args_buf, &host_tid, sizeof(u32), 4);
save_to_submit_buf(&p.event->args_buf, &file_info.id.device, sizeof(dev_t), 5);
save_to_submit_buf(&p.event->args_buf, &file_info.id.inode, sizeof(unsigned long), 6);

events_perf_submit(&p, IO_WRITE, ret);

// TODO: don't del if passing to send_bin
del_args(IO_WRITE);
// return do_file_io_operation(ctx, IO_WRITE, TAIL_IO_WRITE, false, false);

return 0;
}

SEC("kretprobe/io_write_tail")
int BPF_KPROBE(trace_ret_io_write_tail)
{
return capture_file_write(ctx, IO_WRITE, false);
}

SEC("kprobe/vfs_read")
TRACE_ENT_FUNC(vfs_read, VFS_READ);

Expand Down Expand Up @@ -4023,9 +4144,9 @@ int BPF_KPROBE(trace_ret_do_splice)
// modified (the PIPE_BUF_CAN_MERGE flag is on in the pipe_buffer struct).
struct pipe_buffer *last_write_page_buffer = get_last_write_pipe_buffer(out_pipe);
unsigned int out_pipe_last_buffer_flags = BPF_CORE_READ(last_write_page_buffer, flags);
if ((out_pipe_last_buffer_flags & PIPE_BUF_FLAG_CAN_MERGE) == 0) {
return 0;
}
// if ((out_pipe_last_buffer_flags & PIPE_BUF_FLAG_CAN_MERGE) == 0) {
// return 0;
// }

struct file *in_file = (struct file *) saved_args.args[0];
struct inode *in_inode = BPF_CORE_READ(in_file, f_inode);
Expand Down Expand Up @@ -4866,6 +4987,196 @@ int BPF_KPROBE(trace_ret_exec_binprm2)
return events_perf_submit(&p, PROCESS_EXECUTION_FAILED, ret);
}

SEC("raw_tracepoint/io_uring_create")
int tracepoint__io_uring__io_uring_create(struct bpf_raw_tracepoint_args *ctx)
{
program_data_t p = {};
if (!init_program_data(&p, ctx))
return 0;

if (!should_trace((&p)))
return 0;

if (!should_submit(IO_URING_CREATE, p.event))
return 0;

int fd = ctx->args[0];
struct io_ring_ctx *io_uring_ctx = (struct io_ring_ctx *) ctx->args[1];
u32 sq_entries = ctx->args[2];
u32 cq_entries = ctx->args[3];
u32 flags = ctx->args[4];

// getting the task_struct of the kernel thread if polling is used on this ring.
bool polling = false;
struct io_sq_data *sq_data = BPF_CORE_READ(io_uring_ctx, sq_data);
if (sq_data != NULL) {
// update uring_poll_ctx_map with real task info
struct task_struct *thread = BPF_CORE_READ(sq_data, thread);
u32 host_tid = BPF_CORE_READ(thread, pid);
bpf_map_update_elem(&uring_poll_ctx_map, &host_tid, &p.event->context, BPF_ANY);

polling = true;
}

save_to_submit_buf(&p.event->args_buf, &fd, sizeof(int), 0);
save_to_submit_buf(&p.event->args_buf, &io_uring_ctx, sizeof(struct io_ring_ctx *), 1);
save_to_submit_buf(&p.event->args_buf, &sq_entries, sizeof(u32), 2);
save_to_submit_buf(&p.event->args_buf, &cq_entries, sizeof(u32), 3);
save_to_submit_buf(&p.event->args_buf, &flags, sizeof(u32), 4);
save_to_submit_buf(&p.event->args_buf, &polling, sizeof(bool), 5);

return events_perf_submit(&p, IO_URING_CREATE, 0);
}

statfunc int common_submit_io_uring_submit_req(program_data_t *p,
u8 opcode,
u64 *user_data,
u32 *flags,
bool sq_thread,
u32 *host_tid,
file_info_t *file_info)
{
save_str_to_buf(&p->event->args_buf, file_info->pathname_p, 0);
save_to_submit_buf(&p->event->args_buf, &file_info->id.device, sizeof(dev_t), 1);
save_to_submit_buf(&p->event->args_buf, &file_info->id.inode, sizeof(unsigned long), 2);
save_to_submit_buf(&p->event->args_buf, &opcode, sizeof(u8), 3);
save_to_submit_buf(&p->event->args_buf, user_data, sizeof(u64), 4);
save_to_submit_buf(&p->event->args_buf, flags, sizeof(u32), 5);
save_to_submit_buf(&p->event->args_buf, &sq_thread, sizeof(bool), 6);
save_to_submit_buf(&p->event->args_buf, host_tid, sizeof(u32), 7);

return events_perf_submit(p, IO_URING_SUBMIT_REQ, 0);
}

SEC("raw_tracepoint/io_uring_submit_sqe")
int tracepoint__io_uring__io_uring_submit_sqe(struct bpf_raw_tracepoint_args *ctx)
{
program_data_t p = {};
if (!init_program_data(&p, ctx))
return 0;

if (!should_trace((&p)))
return 0;

if (!should_submit(IO_URING_SUBMIT_REQ, p.event))
return 0;

// get tracepoint arguments -
// this tracepoint was changed in kernel v6
struct io_kiocb *req;
u8 opcode;
u64 user_data;
u32 flags;
bool sq_thread;
if (!bpf_core_field_exists(req->cmd)) { // Version < v6
req = (struct io_kiocb *) ctx->args[1];
opcode = ctx->args[2];
user_data = ctx->args[3];
flags = ctx->args[4];
sq_thread = ctx->args[6];
} else { // Version >= v6
req = (struct io_kiocb *) ctx->args[0];
opcode = BPF_CORE_READ(req, opcode);
struct io_cqe cqe = BPF_CORE_READ(req, cqe);
user_data = cqe.user_data;
flags = BPF_CORE_READ(req, flags);
struct io_ring_ctx *uring_ctx = BPF_CORE_READ(req, ctx);
u32 ctx_flags = BPF_CORE_READ(uring_ctx, flags);
sq_thread = ctx_flags & IORING_SETUP_SQPOLL;
}

// get file info
struct file *file = BPF_CORE_READ(req, file);
file_info_t file_info = get_file_info(file);

// get real task info
u32 host_tid = p.task_info->context.host_tid;
if (sq_thread) {
// use uring_poll_ctx_map to get real info if existing
event_context_t *real_ctx = bpf_map_lookup_elem(&uring_poll_ctx_map, &host_tid);
if (real_ctx != NULL) {
p.event->context = *real_ctx;
}
}

// submit event
return common_submit_io_uring_submit_req(
&p, opcode, &user_data, &flags, sq_thread, &host_tid, &file_info);
}

SEC("raw_tracepoint/io_uring_submit_req")
int tracepoint__io_uring__io_uring_submit_req(struct bpf_raw_tracepoint_args *ctx)
{
program_data_t p = {};
if (!init_program_data(&p, ctx))
return 0;

if (!should_trace((&p)))
return 0;

if (!should_submit(IO_URING_SUBMIT_REQ, p.event))
return 0;

if (!bpf_core_field_exists(((struct io_kiocb *) 0)->cmd)) { // Version < v6
// this tracepoint only exists from kernel >= v6.4.
// this check is to satisfy the verifier.
return 0;
}

// get tracepoint arguments
struct io_kiocb *req = (struct io_kiocb *) ctx->args[0];
u8 opcode = BPF_CORE_READ(req, opcode);
struct io_cqe cqe = BPF_CORE_READ(req, cqe);
u64 user_data = cqe.user_data;
u32 flags = BPF_CORE_READ(req, flags);
struct io_ring_ctx *uring_ctx = BPF_CORE_READ(req, ctx);
u32 ctx_flags = BPF_CORE_READ(uring_ctx, flags);
bool sq_thread = ctx_flags & IORING_SETUP_SQPOLL;

// get file info
struct file *file = BPF_CORE_READ(req, file);
file_info_t file_info = get_file_info(file);

// get real task info
u32 host_tid = p.task_info->context.host_tid;
if (sq_thread) {
// use uring_poll_ctx_map to get real info if existing
event_context_t *real_ctx = bpf_map_lookup_elem(&uring_poll_ctx_map, &host_tid);
if (real_ctx != NULL) {
p.event->context = *real_ctx;
}
}

// submit event
return common_submit_io_uring_submit_req(
&p, opcode, &user_data, &flags, sq_thread, &host_tid, &file_info);
}

SEC("raw_tracepoint/io_uring_queue_async_work")
int tracepoint__io_uring__io_uring_queue_async_work(struct bpf_raw_tracepoint_args *ctx)
{
program_data_t p = {};
if (!init_program_data(&p, ctx))
return 0;

if (!should_trace((&p)))
return 0;

struct io_kiocb *req = (struct io_kiocb *) ctx->args[2];

// get real task info from uring_poll_ctx_map
u32 host_tid = p.task_info->context.host_tid;
event_context_t *real_ctx = bpf_map_lookup_elem(&uring_poll_ctx_map, &host_tid);
if (real_ctx != NULL) {
p.event->context = *real_ctx;
}

// update uring_worker_ctx_map with real task info
bpf_map_update_elem(&uring_worker_ctx_map, &req, &p.event->context, BPF_ANY);

return 0;
}

// clang-format off

// Network Packets (works from ~5.2 and beyond)
Expand Down
3 changes: 3 additions & 0 deletions pkg/ebpf/c/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,9 @@ enum event_id_e
HIDDEN_KERNEL_MODULE_SEEKER,
MODULE_LOAD,
MODULE_FREE,
IO_URING_CREATE,
IO_URING_SUBMIT_REQ,
IO_WRITE,
MAX_EVENT_ID,
};

Expand Down
Loading

0 comments on commit 5f68284

Please sign in to comment.