From fb958c1e9a5b103705e649b6c8a64049bc651106 Mon Sep 17 00:00:00 2001 From: Michal Rostecki Date: Fri, 14 Jun 2024 17:07:13 +0200 Subject: [PATCH 1/9] feat: Include list of image layer directories in `ContainerInfo` For each container, try to retrieve information about image layers paths (from the host filesystem's perspective) and include them in `ContainerInfo`. --- Cargo.lock | 2 + crates/bpf-common/Cargo.toml | 2 + crates/bpf-common/src/containers/mod.rs | 77 ++++++- crates/pulsar-core/src/pdk/process_tracker.rs | 198 ++++++++++-------- 4 files changed, 188 insertions(+), 91 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2c5c7ac4..1c34688b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -344,6 +344,8 @@ dependencies = [ "diesel", "glob", "hex", + "hyper", + "hyperlocal", "lazy_static", "libc", "libsqlite3-sys", diff --git a/crates/bpf-common/Cargo.toml b/crates/bpf-common/Cargo.toml index 493ff29a..57fe2d95 100644 --- a/crates/bpf-common/Cargo.toml +++ b/crates/bpf-common/Cargo.toml @@ -28,6 +28,8 @@ procfs = { workspace = true } libc = { workspace = true } glob = { workspace = true } hex = { workspace = true } +hyper = { workspace = true } +hyperlocal = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } lazy_static = { workspace = true } diff --git a/crates/bpf-common/src/containers/mod.rs b/crates/bpf-common/src/containers/mod.rs index 4b9ffcea..7699fef8 100644 --- a/crates/bpf-common/src/containers/mod.rs +++ b/crates/bpf-common/src/containers/mod.rs @@ -7,9 +7,12 @@ use std::{ os::unix::ffi::OsStringExt, path::{Path, PathBuf}, ptr, + str::FromStr, }; use diesel::{connection::SimpleConnection, prelude::*}; +use hyper::{body, Client}; +use hyperlocal::{UnixClientExt, Uri as HyperlocalUri}; use ini::Ini; use nix::unistd::Uid; use serde::{Deserialize, Serialize}; @@ -138,6 +141,32 @@ struct LibpodDBContainerConfig { json: String, } +/// Docker API response for `image inspect` request. +#[derive(Debug, Deserialize)] +struct ImageInspect { + #[serde(rename = "GraphDriver")] + graph_driver: GraphDriver, +} + +/// Data associated with Docker graphdriver. +#[derive(Debug, Deserialize)] +struct GraphDriver { + #[serde(rename = "Data")] + data: GraphDriverData, +} + +#[derive(Debug, Deserialize)] +struct GraphDriverData { + #[serde(rename = "LowerDir")] + lower_dir: Option, + #[serde(rename = "MergedDir")] + merged_dir: Option, + #[serde(rename = "UpperDir")] + upper_dir: Option, + #[serde(rename = "WorkDir")] + work_dir: Option, +} + /// Container information used in Pulsar alerts and rules. #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Validatron)] pub struct ContainerInfo { @@ -145,6 +174,8 @@ pub struct ContainerInfo { pub name: String, pub image: String, pub image_digest: String, + #[validatron(skip)] + pub layers: Vec, } impl fmt::Display for ContainerInfo { @@ -158,19 +189,19 @@ impl fmt::Display for ContainerInfo { } impl ContainerInfo { - pub fn from_container_id( + pub async fn from_container_id( container_id: ContainerId, uid: Uid, ) -> Result, ContainerError> { let info = match container_id { - ContainerId::Docker(id) => Self::from_docker_id(id), + ContainerId::Docker(id) => Self::from_docker_id(id).await, ContainerId::Libpod(id) => Self::from_libpod_id(id, uid), }; info.map(Some) } - fn from_docker_id(id: String) -> Result { + async fn from_docker_id(id: String) -> Result { const DOCKER_CONTAINERS_PATH: &str = "/var/lib/docker/containers"; let path = PathBuf::from(DOCKER_CONTAINERS_PATH) @@ -194,11 +225,49 @@ impl ContainerInfo { let image = config.config.image; let image_digest = config.image_digest; + // `image_digest` has format like: + // + // ``` + // sha256:1d34ffeaf190be23d3de5a8de0a436676b758f48f835c3a2d4768b798c15a7f1 + // ``` + // + // The unprefixed digest is used as an image ID. + let image_id = image_digest.split(':').last().unwrap(); + + let client = Client::unix(); + let url = HyperlocalUri::new( + "/var/run/docker.sock", + &format!("/images/{}/json", image_id), + ); + + let response = client.get(url.into()).await.unwrap(); + let body_bytes = body::to_bytes(response).await.unwrap(); + + let response: ImageInspect = serde_json::from_slice(&body_bytes).unwrap(); + + // Gather all filesystem layer paths. + let mut layers = Vec::new(); + if let Some(lower_dirs) = response.graph_driver.data.lower_dir { + for lower_dir in lower_dirs.split(':') { + layers.push(PathBuf::from_str(lower_dir).unwrap()); + } + } + if let Some(merged_dir) = response.graph_driver.data.merged_dir { + layers.push(merged_dir); + } + if let Some(upper_dir) = response.graph_driver.data.upper_dir { + layers.push(upper_dir); + } + if let Some(work_dir) = response.graph_driver.data.work_dir { + layers.push(work_dir); + } + Ok(Self { id, name, image, image_digest, + layers, }) } @@ -266,6 +335,8 @@ impl ContainerInfo { name: config.name, image: config.rootfs_image_name, image_digest: image.digest.clone(), + // TODO(vadorovsky): Parse layer information in Podman. + layers: Vec::new(), }) } } diff --git a/crates/pulsar-core/src/pdk/process_tracker.rs b/crates/pulsar-core/src/pdk/process_tracker.rs index cb928468..26163f0f 100644 --- a/crates/pulsar-core/src/pdk/process_tracker.rs +++ b/crates/pulsar-core/src/pdk/process_tracker.rs @@ -1,4 +1,8 @@ -use std::collections::{BTreeMap, HashMap}; +use std::{ + collections::{BTreeMap, HashMap}, + future::Future, + pin::Pin, +}; use bpf_common::{ containers::{ContainerId, ContainerInfo}, @@ -209,7 +213,7 @@ impl ProcessTracker { tokio::select! { msg = self.rx.recv() => match msg { Some(msg) => { - self.handle_message(msg); + self.handle_message(msg).await; self.cleanup(); // We check pending requests here and not periodically because // the only way we can get a response is by handling a message. @@ -224,9 +228,11 @@ impl ProcessTracker { } } - fn handle_message(&mut self, req: TrackerRequest) { + async fn handle_message(&mut self, req: TrackerRequest) { match req { - TrackerRequest::UpdateProcess(update) => self.handle_update(update), + TrackerRequest::UpdateProcess(update) => { + self.handle_update(update).await; + } TrackerRequest::GetProcessInfo(info_request) => { let r = self.get_info(info_request.pid, info_request.ts); match r { @@ -263,100 +269,116 @@ impl ProcessTracker { } } - fn handle_update(&mut self, mut update: TrackerUpdate) { - match update { - TrackerUpdate::Fork { - pid, - uid, - gid, - timestamp, - ppid, - namespaces, - container_id, - } => { - let container = - container_id.and_then(|c_id| { - match ContainerInfo::from_container_id(c_id, uid) { - Ok(container) => container, - Err(err) => { - log::error!("{err}"); - None + fn handle_update<'a>( + &'a mut self, + mut update: TrackerUpdate, + ) -> Pin + Send + 'a>> { + Box::pin(async move { + match update { + TrackerUpdate::Fork { + pid, + uid, + gid, + timestamp, + ppid, + namespaces, + container_id, + } => { + let container = match container_id { + Some(container_id) => { + match ContainerInfo::from_container_id(container_id.clone(), uid).await + { + Ok(container) => container, + Err(err) => { + log::error!("{err}"); + None + } } } - }); + None => None, + }; - self.processes.insert( - pid, - ProcessData { - ppid, - uid, - gid, - fork_time: timestamp, - exit_time: None, - original_image: self.get_image(ppid, timestamp), - exec_changes: BTreeMap::new(), - argv: self - .processes - .get(&ppid) - .map(|parent| parent.argv.clone()) - .unwrap_or_default(), - namespaces, - container, - }, - ); - if let Some(pending_updates) = self.pending_updates.remove(&pid) { - pending_updates - .into_iter() - .for_each(|update| self.handle_update(update)); + self.processes.insert( + pid, + ProcessData { + ppid, + uid, + gid, + fork_time: timestamp, + exit_time: None, + original_image: self.get_image(ppid, timestamp), + exec_changes: BTreeMap::new(), + argv: self + .processes + .get(&ppid) + .map(|parent| parent.argv.clone()) + .unwrap_or_default(), + namespaces, + container, + }, + ); + if let Some(pending_updates) = self.pending_updates.remove(&pid) { + for update in pending_updates { + self.handle_update(update).await; + } + } } - } - TrackerUpdate::Exec { - pid, - uid, - timestamp, - ref mut image, - ref mut argv, - namespaces, - ref container_id, - } => { - let container = container_id.clone().and_then(|c_id| { - match ContainerInfo::from_container_id(c_id, uid) { - Ok(container) => container, - Err(err) => { - log::error!("{err}"); - None + TrackerUpdate::Exec { + pid, + uid, + timestamp, + ref mut image, + ref mut argv, + namespaces, + ref container_id, + } => { + let container = match container_id { + Some(container_id) => { + match ContainerInfo::from_container_id(container_id.clone(), uid).await + { + Ok(container) => container, + Err(err) => { + log::error!("{err}"); + None + } + } } + None => None, + }; + + if let Some(p) = self.processes.get_mut(&pid) { + p.exec_changes.insert(timestamp, std::mem::take(image)); + p.argv = std::mem::take(argv); + p.namespaces = namespaces; + p.container = container; + } else { + // if exec arrived before the fork, we save the event as pending + log::debug!( + "(exec) Process {pid} not found in process tree, saving for later" + ); + self.pending_updates.entry(pid).or_default().push(update); } - }); - - if let Some(p) = self.processes.get_mut(&pid) { - p.exec_changes.insert(timestamp, std::mem::take(image)); - p.argv = std::mem::take(argv); - p.namespaces = namespaces; - p.container = container; - } else { - // if exec arrived before the fork, we save the event as pending - log::debug!("(exec) Process {pid} not found in process tree, saving for later"); - self.pending_updates.entry(pid).or_default().push(update); } - } - TrackerUpdate::Exit { pid, timestamp } => { - if let Some(p) = self.processes.get_mut(&pid) { - p.exit_time = Some(timestamp); - } else { - // if exit arrived before the fork, we save the event as pending - log::debug!("(exit) Process {pid} not found in process tree, saving for later"); - self.pending_updates.entry(pid).or_default().push(update); + TrackerUpdate::Exit { pid, timestamp } => { + if let Some(p) = self.processes.get_mut(&pid) { + p.exit_time = Some(timestamp); + } else { + // if exit arrived before the fork, we save the event as pending + log::debug!( + "(exit) Process {pid} not found in process tree, saving for later" + ); + self.pending_updates.entry(pid).or_default().push(update); + } } - } - TrackerUpdate::SetNewParent { pid, ppid } => { - if let Some(p) = self.processes.get_mut(&pid) { - p.ppid = ppid; - } else { - log::warn!("{ppid} is the new parent of {pid}, but we couldn't find it") + TrackerUpdate::SetNewParent { pid, ppid } => { + if let Some(p) = self.processes.get_mut(&pid) { + p.ppid = ppid; + } else { + log::warn!("{ppid} is the new parent of {pid}, but we couldn't find it") + } } } - } + }) } fn get_info(&self, pid: Pid, ts: Timestamp) -> Result { From 25066a1208edd66a28082a8ccac4a688f4c56d2f Mon Sep 17 00:00:00 2001 From: Michal Rostecki Date: Mon, 17 Jun 2024 10:18:29 +0200 Subject: [PATCH 2/9] fix: Handle empty `GraphDriver.Data` field in container metadata This field is present only for OverlayFS driver. BTRFS doesn not use it. --- crates/bpf-common/src/containers/mod.rs | 28 +++++++++++++------------ 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/crates/bpf-common/src/containers/mod.rs b/crates/bpf-common/src/containers/mod.rs index 7699fef8..ba5fcb7f 100644 --- a/crates/bpf-common/src/containers/mod.rs +++ b/crates/bpf-common/src/containers/mod.rs @@ -152,7 +152,7 @@ struct ImageInspect { #[derive(Debug, Deserialize)] struct GraphDriver { #[serde(rename = "Data")] - data: GraphDriverData, + data: Option, } #[derive(Debug, Deserialize)] @@ -247,19 +247,21 @@ impl ContainerInfo { // Gather all filesystem layer paths. let mut layers = Vec::new(); - if let Some(lower_dirs) = response.graph_driver.data.lower_dir { - for lower_dir in lower_dirs.split(':') { - layers.push(PathBuf::from_str(lower_dir).unwrap()); + if let Some(graph_driver_data) = response.graph_driver.data { + if let Some(lower_dirs) = graph_driver_data.lower_dir { + for lower_dir in lower_dirs.split(':') { + layers.push(PathBuf::from_str(lower_dir).unwrap()); + } + } + if let Some(merged_dir) = graph_driver_data.merged_dir { + layers.push(merged_dir); + } + if let Some(upper_dir) = graph_driver_data.upper_dir { + layers.push(upper_dir); + } + if let Some(work_dir) = graph_driver_data.work_dir { + layers.push(work_dir); } - } - if let Some(merged_dir) = response.graph_driver.data.merged_dir { - layers.push(merged_dir); - } - if let Some(upper_dir) = response.graph_driver.data.upper_dir { - layers.push(upper_dir); - } - if let Some(work_dir) = response.graph_driver.data.work_dir { - layers.push(work_dir); } Ok(Self { From 6ce1b61a5929b7b6a6c81d40fbb32e04862403fd Mon Sep 17 00:00:00 2001 From: Michal Rostecki Date: Mon, 17 Jun 2024 10:48:29 +0200 Subject: [PATCH 3/9] feat: Detect image layer directory for BTRFS The procedure for BTRFS is more complicated than for OverlayFS. `GraphDriver` doesn't contain straightforward information. Instead, it has to be retrieved with the following steps: 1. Using the given image ID, find an "imagedb entry". It's located in `/var/lib/docker/image/btrfs/imagedb/content/sha256/`. 2. Get the list of layer checksums from that entry. 3. For each layer, check whether a "layerdb entry" exists. It's located in `/var/lib/docker/image/btrfs/layerdb/sha256/`. The layerdb directory contains a `cache-id` file. 4. That `cache-id` file contains an ID of a BTRFS subvolume. The subvolume can be found in `/var/lib/docker/btrfs/subvolumes/`. --- crates/bpf-common/src/containers/mod.rs | 81 +++++++++++++++++++++---- 1 file changed, 68 insertions(+), 13 deletions(-) diff --git a/crates/bpf-common/src/containers/mod.rs b/crates/bpf-common/src/containers/mod.rs index ba5fcb7f..5dc5b8d5 100644 --- a/crates/bpf-common/src/containers/mod.rs +++ b/crates/bpf-common/src/containers/mod.rs @@ -1,7 +1,7 @@ use std::{ ffi::{CStr, OsString}, fmt, - fs::File, + fs::{self, File}, io::{self, BufReader}, mem, os::unix::ffi::OsStringExt, @@ -153,6 +153,8 @@ struct ImageInspect { struct GraphDriver { #[serde(rename = "Data")] data: Option, + #[serde(rename = "Name")] + name: GraphDriverName, } #[derive(Debug, Deserialize)] @@ -167,6 +169,24 @@ struct GraphDriverData { work_dir: Option, } +#[derive(Debug, Deserialize)] +enum GraphDriverName { + #[serde(rename = "btrfs")] + Btrfs, + #[serde(rename = "overlay")] + Overlayfs, +} + +#[derive(Debug, Deserialize)] +struct ImageDbEntry { + rootfs: Rootfs, +} + +#[derive(Debug, Deserialize)] +struct Rootfs { + diff_ids: Vec, +} + /// Container information used in Pulsar alerts and rules. #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Validatron)] pub struct ContainerInfo { @@ -203,6 +223,9 @@ impl ContainerInfo { async fn from_docker_id(id: String) -> Result { const DOCKER_CONTAINERS_PATH: &str = "/var/lib/docker/containers"; + const DOCKER_IMAGEDB_PATH: &str = "/var/lib/docker/image/btrfs/imagedb/content/sha256/"; + const DOCKER_LAYERDB_PATH: &str = "/var/lib/docker/image/btrfs/layerdb/sha256/"; + const DOCKER_BTRFS_SUBVOL_PATH: &str = "/var/lib/docker/btrfs/subvolumes/"; let path = PathBuf::from(DOCKER_CONTAINERS_PATH) .join(&id) @@ -247,21 +270,53 @@ impl ContainerInfo { // Gather all filesystem layer paths. let mut layers = Vec::new(); - if let Some(graph_driver_data) = response.graph_driver.data { - if let Some(lower_dirs) = graph_driver_data.lower_dir { - for lower_dir in lower_dirs.split(':') { - layers.push(PathBuf::from_str(lower_dir).unwrap()); + + match response.graph_driver.name { + GraphDriverName::Btrfs => { + let path = PathBuf::from(DOCKER_IMAGEDB_PATH).join(&image_id); + let file = File::open(&path).map_err(|source| ContainerError::ReadFile { + source, + path: path.clone(), + })?; + + let reader = BufReader::new(file); + let imagedb_entry: ImageDbEntry = serde_json::from_reader(reader) + .map_err(|source| ContainerError::ParseConfigFile { source, path })?; + + for layer_id in imagedb_entry.rootfs.diff_ids { + let layer_id = layer_id.split(':').last().unwrap(); + + let path = PathBuf::from(DOCKER_LAYERDB_PATH).join(&layer_id); + if path.exists() { + let path = path.join("cache-id"); + let btrfs_subvol_id = fs::read_to_string(&path) + .map_err(|source| ContainerError::ReadFile { source, path })?; + let btrfs_subvol_path = + PathBuf::from(DOCKER_BTRFS_SUBVOL_PATH).join(btrfs_subvol_id); + + layers.push(btrfs_subvol_path); + } } } - if let Some(merged_dir) = graph_driver_data.merged_dir { - layers.push(merged_dir); - } - if let Some(upper_dir) = graph_driver_data.upper_dir { - layers.push(upper_dir); - } - if let Some(work_dir) = graph_driver_data.work_dir { - layers.push(work_dir); + GraphDriverName::Overlayfs => { + if let Some(graph_driver_data) = response.graph_driver.data { + if let Some(lower_dirs) = graph_driver_data.lower_dir { + for lower_dir in lower_dirs.split(':') { + layers.push(PathBuf::from_str(lower_dir).unwrap()); + } + } + if let Some(merged_dir) = graph_driver_data.merged_dir { + layers.push(merged_dir); + } + if let Some(upper_dir) = graph_driver_data.upper_dir { + layers.push(upper_dir); + } + if let Some(work_dir) = graph_driver_data.work_dir { + layers.push(work_dir); + } + } } + _ => {} } Ok(Self { From 177e26d6923e5ba2467bfa06f23a44aa5d8b1b6e Mon Sep 17 00:00:00 2001 From: Michal Rostecki Date: Mon, 17 Jun 2024 11:34:22 +0200 Subject: [PATCH 4/9] style: Move the code related to layers to a separate file --- crates/bpf-common/src/containers/layers.rs | 174 +++++++++++++++++++++ crates/bpf-common/src/containers/mod.rs | 118 +------------- 2 files changed, 178 insertions(+), 114 deletions(-) create mode 100644 crates/bpf-common/src/containers/layers.rs diff --git a/crates/bpf-common/src/containers/layers.rs b/crates/bpf-common/src/containers/layers.rs new file mode 100644 index 00000000..c423106c --- /dev/null +++ b/crates/bpf-common/src/containers/layers.rs @@ -0,0 +1,174 @@ +use std::{ + fmt, + fs::{self, File}, + io::BufReader, + path::PathBuf, + str::FromStr, +}; + +use hyper::{body, Client}; +use hyperlocal::{UnixClientExt, Uri as HyperlocalUri}; +use serde::Deserialize; + +use super::ContainerError; + +const DOCKER_SOCKET: &str = "/var/run/docker.sock"; + +/// Docker API response for `image inspect` request. +#[derive(Debug, Deserialize)] +struct ImageInspect { + #[serde(rename = "GraphDriver")] + graph_driver: GraphDriver, +} + +#[derive(Debug, Deserialize)] +struct GraphDriver { + #[serde(rename = "Data")] + data: Option, + #[serde(rename = "Name")] + name: GraphDriverName, +} + +#[derive(Debug, Deserialize)] +struct GraphDriverData { + #[serde(rename = "LowerDir")] + lower_dir: Option, + #[serde(rename = "MergedDir")] + merged_dir: Option, + #[serde(rename = "UpperDir")] + upper_dir: Option, + #[serde(rename = "WorkDir")] + work_dir: Option, +} + +#[derive(Debug, Deserialize)] +enum GraphDriverName { + #[serde(rename = "btrfs")] + Btrfs, + #[serde(rename = "fuse-overlayfs")] + FuseOverlayfs, + #[serde(rename = "overlay2")] + Overlayfs, + #[serde(rename = "vfs")] + Vfs, + #[serde(rename = "zfs")] + Zfs, +} + +impl fmt::Display for GraphDriverName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Btrfs => write!(f, "btrfs"), + Self::FuseOverlayfs => write!(f, "fuse-overlayfs"), + Self::Overlayfs => write!(f, "overlay2"), + Self::Vfs => write!(f, "vfs"), + Self::Zfs => write!(f, "zfs"), + } + } +} + +#[derive(Debug, Deserialize)] +struct ImageDbEntry { + rootfs: Rootfs, +} + +#[derive(Debug, Deserialize)] +struct Rootfs { + diff_ids: Vec, +} + +/// Returns a list of layer paths for the given Docker image ID. +pub(crate) async fn docker_layers(image_id: &str) -> Result, ContainerError> { + let client = Client::unix(); + let url = HyperlocalUri::new(DOCKER_SOCKET, &format!("/images/{}/json", image_id)); + + let response = client.get(url.into()).await.unwrap(); + let body_bytes = body::to_bytes(response).await.unwrap(); + + let response: ImageInspect = serde_json::from_slice(&body_bytes).unwrap(); + + match response.graph_driver.name { + GraphDriverName::Btrfs => docker_btrfs_layers(image_id), + GraphDriverName::Overlayfs => docker_overlayfs_layers(response.graph_driver.data), + _ => { + log::warn!( + "Docker graph driver {} is unsupported", + response.graph_driver.name + ); + Ok(Vec::new()) + } + } +} + +/// Returns a list of BTRFS layer paths for the given Docker image ID. +/// +/// The procedure for BTRFS is not straigthforward, since the `image inspect` +/// response doesn't have direct information about layer directories. It +/// consists of the following steps: +/// +/// 1. Using the given image ID, find an "imagedb entry". It's located in +/// `/var/lib/docker/image/btrfs/imagedb/content/sha256/`. +/// 2. Get the list of layer checksums from that entry. +/// 3. For each layer, check whether a "layerdb entry" exists. It's located +/// in `/var/lib/docker/image/btrfs/layerdb/sha256/`. The +/// layerdb directory contains a `cache-id` file. +/// 4. That `cache-id` file contains an ID of a BTRFS subvolume. The +/// subvolume can be found in `/var/lib/docker/btrfs/subvolumes/`. +fn docker_btrfs_layers(image_id: &str) -> Result, ContainerError> { + const DOCKER_IMAGEDB_PATH: &str = "/var/lib/docker/image/btrfs/imagedb/content/sha256/"; + const DOCKER_LAYERDB_PATH: &str = "/var/lib/docker/image/btrfs/layerdb/sha256/"; + const DOCKER_BTRFS_SUBVOL_PATH: &str = "/var/lib/docker/btrfs/subvolumes/"; + + let mut layers = Vec::new(); + + let path = PathBuf::from(DOCKER_IMAGEDB_PATH).join(&image_id); + let file = File::open(&path).map_err(|source| ContainerError::ReadFile { + source, + path: path.clone(), + })?; + + let reader = BufReader::new(file); + let imagedb_entry: ImageDbEntry = serde_json::from_reader(reader) + .map_err(|source| ContainerError::ParseConfigFile { source, path })?; + + for layer_id in imagedb_entry.rootfs.diff_ids { + let layer_id = layer_id.split(':').last().unwrap(); + + let path = PathBuf::from(DOCKER_LAYERDB_PATH).join(&layer_id); + if path.exists() { + let path = path.join("cache-id"); + let btrfs_subvol_id = fs::read_to_string(&path) + .map_err(|source| ContainerError::ReadFile { source, path })?; + let btrfs_subvol_path = PathBuf::from(DOCKER_BTRFS_SUBVOL_PATH).join(btrfs_subvol_id); + + layers.push(btrfs_subvol_path); + } + } + + Ok(layers) +} + +fn docker_overlayfs_layers( + graph_driver_data: Option, +) -> Result, ContainerError> { + let mut layers = Vec::new(); + + if let Some(graph_driver_data) = graph_driver_data { + if let Some(lower_dirs) = graph_driver_data.lower_dir { + for lower_dir in lower_dirs.split(':') { + layers.push(PathBuf::from_str(lower_dir).unwrap()); + } + } + if let Some(merged_dir) = graph_driver_data.merged_dir { + layers.push(merged_dir); + } + if let Some(upper_dir) = graph_driver_data.upper_dir { + layers.push(upper_dir); + } + if let Some(work_dir) = graph_driver_data.work_dir { + layers.push(work_dir); + } + } + + Ok(layers) +} diff --git a/crates/bpf-common/src/containers/mod.rs b/crates/bpf-common/src/containers/mod.rs index 5dc5b8d5..1a434b0a 100644 --- a/crates/bpf-common/src/containers/mod.rs +++ b/crates/bpf-common/src/containers/mod.rs @@ -1,18 +1,15 @@ use std::{ ffi::{CStr, OsString}, fmt, - fs::{self, File}, + fs::File, io::{self, BufReader}, mem, os::unix::ffi::OsStringExt, path::{Path, PathBuf}, ptr, - str::FromStr, }; use diesel::{connection::SimpleConnection, prelude::*}; -use hyper::{body, Client}; -use hyperlocal::{UnixClientExt, Uri as HyperlocalUri}; use ini::Ini; use nix::unistd::Uid; use serde::{Deserialize, Serialize}; @@ -21,6 +18,7 @@ use validatron::Validatron; use crate::parsing::procfs::ProcfsError; +pub mod layers; pub mod schema; #[derive(Error, Debug)] @@ -141,52 +139,6 @@ struct LibpodDBContainerConfig { json: String, } -/// Docker API response for `image inspect` request. -#[derive(Debug, Deserialize)] -struct ImageInspect { - #[serde(rename = "GraphDriver")] - graph_driver: GraphDriver, -} - -/// Data associated with Docker graphdriver. -#[derive(Debug, Deserialize)] -struct GraphDriver { - #[serde(rename = "Data")] - data: Option, - #[serde(rename = "Name")] - name: GraphDriverName, -} - -#[derive(Debug, Deserialize)] -struct GraphDriverData { - #[serde(rename = "LowerDir")] - lower_dir: Option, - #[serde(rename = "MergedDir")] - merged_dir: Option, - #[serde(rename = "UpperDir")] - upper_dir: Option, - #[serde(rename = "WorkDir")] - work_dir: Option, -} - -#[derive(Debug, Deserialize)] -enum GraphDriverName { - #[serde(rename = "btrfs")] - Btrfs, - #[serde(rename = "overlay")] - Overlayfs, -} - -#[derive(Debug, Deserialize)] -struct ImageDbEntry { - rootfs: Rootfs, -} - -#[derive(Debug, Deserialize)] -struct Rootfs { - diff_ids: Vec, -} - /// Container information used in Pulsar alerts and rules. #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Validatron)] pub struct ContainerInfo { @@ -223,9 +175,6 @@ impl ContainerInfo { async fn from_docker_id(id: String) -> Result { const DOCKER_CONTAINERS_PATH: &str = "/var/lib/docker/containers"; - const DOCKER_IMAGEDB_PATH: &str = "/var/lib/docker/image/btrfs/imagedb/content/sha256/"; - const DOCKER_LAYERDB_PATH: &str = "/var/lib/docker/image/btrfs/layerdb/sha256/"; - const DOCKER_BTRFS_SUBVOL_PATH: &str = "/var/lib/docker/btrfs/subvolumes/"; let path = PathBuf::from(DOCKER_CONTAINERS_PATH) .join(&id) @@ -257,67 +206,8 @@ impl ContainerInfo { // The unprefixed digest is used as an image ID. let image_id = image_digest.split(':').last().unwrap(); - let client = Client::unix(); - let url = HyperlocalUri::new( - "/var/run/docker.sock", - &format!("/images/{}/json", image_id), - ); - - let response = client.get(url.into()).await.unwrap(); - let body_bytes = body::to_bytes(response).await.unwrap(); - - let response: ImageInspect = serde_json::from_slice(&body_bytes).unwrap(); - - // Gather all filesystem layer paths. - let mut layers = Vec::new(); - - match response.graph_driver.name { - GraphDriverName::Btrfs => { - let path = PathBuf::from(DOCKER_IMAGEDB_PATH).join(&image_id); - let file = File::open(&path).map_err(|source| ContainerError::ReadFile { - source, - path: path.clone(), - })?; - - let reader = BufReader::new(file); - let imagedb_entry: ImageDbEntry = serde_json::from_reader(reader) - .map_err(|source| ContainerError::ParseConfigFile { source, path })?; - - for layer_id in imagedb_entry.rootfs.diff_ids { - let layer_id = layer_id.split(':').last().unwrap(); - - let path = PathBuf::from(DOCKER_LAYERDB_PATH).join(&layer_id); - if path.exists() { - let path = path.join("cache-id"); - let btrfs_subvol_id = fs::read_to_string(&path) - .map_err(|source| ContainerError::ReadFile { source, path })?; - let btrfs_subvol_path = - PathBuf::from(DOCKER_BTRFS_SUBVOL_PATH).join(btrfs_subvol_id); - - layers.push(btrfs_subvol_path); - } - } - } - GraphDriverName::Overlayfs => { - if let Some(graph_driver_data) = response.graph_driver.data { - if let Some(lower_dirs) = graph_driver_data.lower_dir { - for lower_dir in lower_dirs.split(':') { - layers.push(PathBuf::from_str(lower_dir).unwrap()); - } - } - if let Some(merged_dir) = graph_driver_data.merged_dir { - layers.push(merged_dir); - } - if let Some(upper_dir) = graph_driver_data.upper_dir { - layers.push(upper_dir); - } - if let Some(work_dir) = graph_driver_data.work_dir { - layers.push(work_dir); - } - } - } - _ => {} - } + let layers = layers::docker_layers(image_id).await?; + log::debug!("found layer filesystems for container {id}: {layers:?}"); Ok(Self { id, From 71d2e89cbc0ebaa998cb5e2c3eca0f2d438326c5 Mon Sep 17 00:00:00 2001 From: Michal Rostecki Date: Tue, 18 Jun 2024 15:15:48 +0200 Subject: [PATCH 5/9] style: Define error variants instead of using `unwrap()` --- crates/bpf-common/src/containers/layers.rs | 33 +++++++++++++++++----- crates/bpf-common/src/containers/mod.rs | 27 +++++++++++++++++- 2 files changed, 52 insertions(+), 8 deletions(-) diff --git a/crates/bpf-common/src/containers/layers.rs b/crates/bpf-common/src/containers/layers.rs index c423106c..ec8f783f 100644 --- a/crates/bpf-common/src/containers/layers.rs +++ b/crates/bpf-common/src/containers/layers.rs @@ -80,12 +80,27 @@ struct Rootfs { /// Returns a list of layer paths for the given Docker image ID. pub(crate) async fn docker_layers(image_id: &str) -> Result, ContainerError> { let client = Client::unix(); - let url = HyperlocalUri::new(DOCKER_SOCKET, &format!("/images/{}/json", image_id)); - - let response = client.get(url.into()).await.unwrap(); - let body_bytes = body::to_bytes(response).await.unwrap(); - - let response: ImageInspect = serde_json::from_slice(&body_bytes).unwrap(); + let uri = HyperlocalUri::new(DOCKER_SOCKET, &format!("/images/{}/json", image_id)); + let uri: hyper::Uri = uri.into(); + + let response = + client + .get(uri.clone()) + .await + .map_err(|source| ContainerError::HyperRequest { + source, + uri: uri.clone(), + })?; + let body_bytes = + body::to_bytes(response) + .await + .map_err(|source| ContainerError::HyperResponse { + source, + uri: uri.clone(), + })?; + + let response: ImageInspect = serde_json::from_slice(&body_bytes) + .map_err(|source| ContainerError::ParseResponse { source, uri })?; match response.graph_driver.name { GraphDriverName::Btrfs => docker_btrfs_layers(image_id), @@ -132,7 +147,10 @@ fn docker_btrfs_layers(image_id: &str) -> Result, ContainerError> { .map_err(|source| ContainerError::ParseConfigFile { source, path })?; for layer_id in imagedb_entry.rootfs.diff_ids { - let layer_id = layer_id.split(':').last().unwrap(); + let layer_id = layer_id + .split(':') + .last() + .ok_or(ContainerError::InvalidLayerID(layer_id.clone()))?; let path = PathBuf::from(DOCKER_LAYERDB_PATH).join(&layer_id); if path.exists() { @@ -156,6 +174,7 @@ fn docker_overlayfs_layers( if let Some(graph_driver_data) = graph_driver_data { if let Some(lower_dirs) = graph_driver_data.lower_dir { for lower_dir in lower_dirs.split(':') { + // `PathBuf::from_str` is infallible. layers.push(PathBuf::from_str(lower_dir).unwrap()); } } diff --git a/crates/bpf-common/src/containers/mod.rs b/crates/bpf-common/src/containers/mod.rs index 1a434b0a..f34510e9 100644 --- a/crates/bpf-common/src/containers/mod.rs +++ b/crates/bpf-common/src/containers/mod.rs @@ -41,8 +41,26 @@ pub enum ContainerError { source: serde_json::error::Error, path: PathBuf, }, + #[error("parsing response from `{uri:?}` failed")] + ParseResponse { + #[source] + source: serde_json::error::Error, + uri: hyper::Uri, + }, #[error("path `{path}` is non-UTF-8")] PathNonUtf8 { path: PathBuf }, + #[error("failed to make a request to the UNIX socket `{uri:?}`")] + HyperRequest { + #[source] + source: hyper::Error, + uri: hyper::Uri, + }, + #[error("failed to parse a response from the UNIX socket `{uri:?}`")] + HyperResponse { + #[source] + source: hyper::Error, + uri: hyper::Uri, + }, #[error("could not connect to the database `{path:?}`")] SqliteConnection { #[source] @@ -79,6 +97,10 @@ pub enum ContainerError { BoltBucketNotFound(String), #[error("bolt key `{0}` not found")] BoltKeyNotFound(String), + #[error("Invalid layer ID: `{0}`")] + InvalidLayerID(String), + #[error("Invalid image digest: `{0}`")] + InvalidImageDigest(String), } /// A container ID. @@ -204,7 +226,10 @@ impl ContainerInfo { // ``` // // The unprefixed digest is used as an image ID. - let image_id = image_digest.split(':').last().unwrap(); + let image_id = image_digest + .split(':') + .last() + .ok_or(ContainerError::InvalidImageDigest(image_digest.clone()))?; let layers = layers::docker_layers(image_id).await?; log::debug!("found layer filesystems for container {id}: {layers:?}"); From cb77d171f77b28c9d41b96a4f538c4a0e0ff5fb9 Mon Sep 17 00:00:00 2001 From: Michal Rostecki Date: Mon, 26 Aug 2024 10:43:29 +0200 Subject: [PATCH 6/9] chore: Fix clippy errors --- crates/bpf-common/src/containers/layers.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/bpf-common/src/containers/layers.rs b/crates/bpf-common/src/containers/layers.rs index ec8f783f..4003a029 100644 --- a/crates/bpf-common/src/containers/layers.rs +++ b/crates/bpf-common/src/containers/layers.rs @@ -136,7 +136,7 @@ fn docker_btrfs_layers(image_id: &str) -> Result, ContainerError> { let mut layers = Vec::new(); - let path = PathBuf::from(DOCKER_IMAGEDB_PATH).join(&image_id); + let path = PathBuf::from(DOCKER_IMAGEDB_PATH).join(image_id); let file = File::open(&path).map_err(|source| ContainerError::ReadFile { source, path: path.clone(), @@ -152,7 +152,7 @@ fn docker_btrfs_layers(image_id: &str) -> Result, ContainerError> { .last() .ok_or(ContainerError::InvalidLayerID(layer_id.clone()))?; - let path = PathBuf::from(DOCKER_LAYERDB_PATH).join(&layer_id); + let path = PathBuf::from(DOCKER_LAYERDB_PATH).join(layer_id); if path.exists() { let path = path.join("cache-id"); let btrfs_subvol_id = fs::read_to_string(&path) From cefaa9ec75a35b23ff9b5eb631c87e41e58dcc49 Mon Sep 17 00:00:00 2001 From: Michal Rostecki Date: Sun, 7 Jul 2024 14:40:55 +0200 Subject: [PATCH 7/9] feat: Detect image layer directory for Podman Use libpod's image store and layer store to determine image layer directories on the host for Podman. --- crates/bpf-common/src/containers/layers.rs | 122 ++++++++++++++++++++- crates/bpf-common/src/containers/mod.rs | 15 ++- 2 files changed, 133 insertions(+), 4 deletions(-) diff --git a/crates/bpf-common/src/containers/layers.rs b/crates/bpf-common/src/containers/layers.rs index 4003a029..6ceb8271 100644 --- a/crates/bpf-common/src/containers/layers.rs +++ b/crates/bpf-common/src/containers/layers.rs @@ -2,12 +2,13 @@ use std::{ fmt, fs::{self, File}, io::BufReader, - path::PathBuf, + path::{Path, PathBuf}, str::FromStr, }; use hyper::{body, Client}; use hyperlocal::{UnixClientExt, Uri as HyperlocalUri}; +use nix::unistd::Uid; use serde::Deserialize; use super::ContainerError; @@ -55,6 +56,12 @@ enum GraphDriverName { Zfs, } +#[derive(Debug, Deserialize)] +struct LibpodLayer { + id: String, + parent: Option, +} + impl fmt::Display for GraphDriverName { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { @@ -191,3 +198,116 @@ fn docker_overlayfs_layers( Ok(layers) } + +/// For the given `top_layer`, return a vector which contains that layer and +/// all parents of it. In other words, return a vector of all layers associated +/// with an image. +pub(crate) fn podman_layers>( + top_layer_id: &str, + uid: Uid, + user_home: P, +) -> Result, ContainerError> { + let layer_store_path = + find_layer_store(uid, &user_home).ok_or(ContainerError::LayerStoreNotFound)?; + let layer_store_file = + File::open(&layer_store_path).map_err(|source| ContainerError::ReadFile { + source, + path: layer_store_path.clone(), + })?; + let reader = BufReader::new(layer_store_file); + + let overlay_dir = + find_overlay_dir(uid, &user_home).ok_or(ContainerError::OverlayDirNotFound)?; + + let mut layers = find_subdirs(overlay_dir.join(top_layer_id)); + let config_layers: Vec = + serde_json::from_reader(reader).map_err(|source| ContainerError::ParseConfigFile { + source, + path: layer_store_path.clone(), + })?; + let mut layer_id = top_layer_id; + let mut limit = config_layers.len(); + loop { + let pos = config_layers[..limit] + .iter() + .rev() + .position(|layer| layer.id == layer_id) + .ok_or(ContainerError::LayerNotFound(layer_id.to_string()))?; + let layer = &config_layers[pos]; + layers.extend(find_subdirs(overlay_dir.join(&layer.id))); + match layer.parent { + Some(ref parent) => { + layer_id = parent; + limit = pos; + } + None => break, + } + } + + Ok(layers) +} + +fn find_layer_store>(uid: Uid, user_home: P) -> Option { + const LIBPOD_LAYER_STORE_PATH: &str = "/var/lib/containers/storage/overlay-layers/layers.json"; + + let layer_store_path = if uid.is_root() { + PathBuf::from(LIBPOD_LAYER_STORE_PATH) + } else { + user_home + .as_ref() + .join(".local") + .join("share") + .join("containers") + .join("storage") + .join("overlay-layers") + .join("layers.json") + }; + + if !layer_store_path.exists() { + return None; + } + + Some(layer_store_path) +} + +fn find_overlay_dir>(uid: Uid, user_home: P) -> Option { + const OVERLAY_PATH: &str = "/var/lib/containers/storage/overlay"; + + let overlay_dir = if uid.is_root() { + PathBuf::from(OVERLAY_PATH) + } else { + user_home + .as_ref() + .join(".local") + .join("share") + .join("containers") + .join("storage") + .join("overlay") + }; + + if !overlay_dir.exists() { + return None; + } + + Some(overlay_dir) +} + +/// Returns all subdirectories of the given `parent_path`. +fn find_subdirs>(parent_path: P) -> Vec { + let mut subdirectories = Vec::new(); + + if parent_path.as_ref().is_dir() { + if let Ok(entries) = fs::read_dir(parent_path) { + for entry in entries { + if let Ok(entry) = entry { + let path = entry.path(); + if path.is_dir() { + subdirectories.push(path); + } + } + } + } + } + + subdirectories +} diff --git a/crates/bpf-common/src/containers/mod.rs b/crates/bpf-common/src/containers/mod.rs index f34510e9..428a817b 100644 --- a/crates/bpf-common/src/containers/mod.rs +++ b/crates/bpf-common/src/containers/mod.rs @@ -71,6 +71,8 @@ pub enum ContainerError { ContainerNotFound { id: String }, #[error("could not find libpod image store")] ImageStoreNotFound, + #[error("could not find libpod layer store")] + LayerStoreNotFound, #[error("could not find container image `{id}` in `{path:?}`")] ImageNotFound { id: String, path: PathBuf }, #[error("parsing image digest {digest} failed")] @@ -101,6 +103,10 @@ pub enum ContainerError { InvalidLayerID(String), #[error("Invalid image digest: `{0}`")] InvalidImageDigest(String), + #[error("layer {0} not found in the layer store")] + LayerNotFound(String), + #[error("could not find overlay directory")] + OverlayDirNotFound, } /// A container ID. @@ -151,6 +157,7 @@ struct LibpodConfig { struct LibpodImageConfig { id: String, digest: String, + layer: String, } /// Database schema of libpod. @@ -297,18 +304,20 @@ impl ContainerInfo { let image = images.iter().find(|image| image.id == image_id).ok_or( ContainerError::ImageNotFound { - id: image_id, + id: image_id.clone(), path: image_store_path, }, )?; + let layers = layers::podman_layers(&image.layer, uid, user_home)?; + log::debug!("found layer filesystems for container {id}: {layers:?}"); + Ok(Self { id, name: config.name, image: config.rootfs_image_name, image_digest: image.digest.clone(), - // TODO(vadorovsky): Parse layer information in Podman. - layers: Vec::new(), + layers, }) } } From abccc00051ce0babccc507bac9c94a6321b0e8e8 Mon Sep 17 00:00:00 2001 From: Michal Rostecki Date: Mon, 29 Jul 2024 08:50:15 +0200 Subject: [PATCH 8/9] chore: Remove unnecessary `clone()` --- crates/bpf-common/src/containers/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/bpf-common/src/containers/mod.rs b/crates/bpf-common/src/containers/mod.rs index 428a817b..c1a9a808 100644 --- a/crates/bpf-common/src/containers/mod.rs +++ b/crates/bpf-common/src/containers/mod.rs @@ -304,7 +304,7 @@ impl ContainerInfo { let image = images.iter().find(|image| image.id == image_id).ok_or( ContainerError::ImageNotFound { - id: image_id.clone(), + id: image_id, path: image_store_path, }, )?; From b874cde4400543812c15f006d436913f8fdef7a6 Mon Sep 17 00:00:00 2001 From: Michal Rostecki Date: Wed, 11 Sep 2024 14:26:40 +0000 Subject: [PATCH 9/9] chore: Fix clippy error --- crates/bpf-common/src/containers/layers.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/crates/bpf-common/src/containers/layers.rs b/crates/bpf-common/src/containers/layers.rs index 6ceb8271..ba5adb06 100644 --- a/crates/bpf-common/src/containers/layers.rs +++ b/crates/bpf-common/src/containers/layers.rs @@ -298,12 +298,10 @@ fn find_subdirs>(parent_path: P) -> Vec { if parent_path.as_ref().is_dir() { if let Ok(entries) = fs::read_dir(parent_path) { - for entry in entries { - if let Ok(entry) = entry { - let path = entry.path(); - if path.is_dir() { - subdirectories.push(path); - } + for entry in entries.flatten() { + let path = entry.path(); + if path.is_dir() { + subdirectories.push(path); } } }