diff --git a/Cargo.lock b/Cargo.lock index add817825..078a6f90d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2008,6 +2008,7 @@ dependencies = [ "regex", "rust-criu", "safe-path", + "scopeguard", "serde", "serde_json", "serial_test", diff --git a/crates/libcontainer/Cargo.toml b/crates/libcontainer/Cargo.toml index eadaa0fd6..0f6c677d9 100644 --- a/crates/libcontainer/Cargo.toml +++ b/crates/libcontainer/Cargo.toml @@ -59,3 +59,4 @@ serial_test = "3.1.1" tempfile = "3" anyhow = "1.0" rand = { version = "0.8.5" } +scopeguard = "1" diff --git a/crates/libcontainer/src/container/builder_impl.rs b/crates/libcontainer/src/container/builder_impl.rs index 9c185978e..8d8494bd7 100644 --- a/crates/libcontainer/src/container/builder_impl.rs +++ b/crates/libcontainer/src/container/builder_impl.rs @@ -57,6 +57,8 @@ pub(super) struct ContainerBuilderImpl { pub stdout: Option, // RawFd set to stderr of the container init process. pub stderr: Option, + // Indicate if the init process should be a sibling of the main process. + pub as_sibling: bool, } impl ContainerBuilderImpl { @@ -172,6 +174,7 @@ impl ContainerBuilderImpl { stdin: self.stdin.as_ref().map(|x| x.as_raw_fd()), stdout: self.stdout.as_ref().map(|x| x.as_raw_fd()), stderr: self.stderr.as_ref().map(|x| x.as_raw_fd()), + as_sibling: self.as_sibling, }; let (init_pid, need_to_clean_up_intel_rdt_dir) = diff --git a/crates/libcontainer/src/container/init_builder.rs b/crates/libcontainer/src/container/init_builder.rs index 4ff2094ed..86e728802 100644 --- a/crates/libcontainer/src/container/init_builder.rs +++ b/crates/libcontainer/src/container/init_builder.rs @@ -21,6 +21,7 @@ pub struct InitContainerBuilder { use_systemd: bool, detached: bool, no_pivot: bool, + as_sibling: bool, } impl InitContainerBuilder { @@ -33,6 +34,7 @@ impl InitContainerBuilder { use_systemd: true, detached: true, no_pivot: false, + as_sibling: false, } } @@ -42,6 +44,13 @@ impl InitContainerBuilder { self } + /// Sets if the init process should be run as a child or a sibling of + /// the calling process + pub fn as_sibling(mut self, as_sibling: bool) -> Self { + self.as_sibling = as_sibling; + self + } + pub fn with_detach(mut self, detached: bool) -> Self { self.detached = detached; self @@ -106,6 +115,7 @@ impl InitContainerBuilder { stdin: self.base.stdin, stdout: self.base.stdout, stderr: self.base.stderr, + as_sibling: self.as_sibling, }; builder_impl.create()?; diff --git a/crates/libcontainer/src/container/tenant_builder.rs b/crates/libcontainer/src/container/tenant_builder.rs index 22b7eff2a..845c1af3d 100644 --- a/crates/libcontainer/src/container/tenant_builder.rs +++ b/crates/libcontainer/src/container/tenant_builder.rs @@ -43,6 +43,7 @@ pub struct TenantContainerBuilder { capabilities: Vec, process: Option, detached: bool, + as_sibling: bool, } impl TenantContainerBuilder { @@ -59,6 +60,7 @@ impl TenantContainerBuilder { capabilities: Vec::new(), process: None, detached: false, + as_sibling: false, } } @@ -95,6 +97,13 @@ impl TenantContainerBuilder { self } + /// Sets if the init process should be run as a child or a sibling of + /// the calling process + pub fn as_sibling(mut self, as_sibling: bool) -> Self { + self.as_sibling = as_sibling; + self + } + pub fn with_detach(mut self, detached: bool) -> Self { self.detached = detached; self @@ -145,6 +154,7 @@ impl TenantContainerBuilder { stdin: self.base.stdin, stdout: self.base.stdout, stderr: self.base.stderr, + as_sibling: self.as_sibling, }; let pid = builder_impl.create()?; diff --git a/crates/libcontainer/src/process/args.rs b/crates/libcontainer/src/process/args.rs index 2ea0dc974..4e7b1ca89 100644 --- a/crates/libcontainer/src/process/args.rs +++ b/crates/libcontainer/src/process/args.rs @@ -50,4 +50,6 @@ pub struct ContainerArgs { pub stdout: Option, // RawFd set to stderr of the container init process. pub stderr: Option, + // Indicate if the init process should be a sibling of the main process. + pub as_sibling: bool, } diff --git a/crates/libcontainer/src/process/container_main_process.rs b/crates/libcontainer/src/process/container_main_process.rs index fd5dfcb1c..a900f8db5 100644 --- a/crates/libcontainer/src/process/container_main_process.rs +++ b/crates/libcontainer/src/process/container_main_process.rs @@ -84,7 +84,13 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result<(Pid, bo ProcessError::SyscallOther(err) })?; - let intermediate_pid = fork::container_clone(cb).map_err(|err| { + let container_clone_fn = if container_args.as_sibling { + fork::container_clone_sibling + } else { + fork::container_clone + }; + + let intermediate_pid = container_clone_fn(cb).map_err(|err| { tracing::error!("failed to fork intermediate process: {}", err); ProcessError::IntermediateProcessFailed(err) })?; diff --git a/crates/libcontainer/tests/as_sibling.rs b/crates/libcontainer/tests/as_sibling.rs new file mode 100644 index 000000000..570f2b932 --- /dev/null +++ b/crates/libcontainer/tests/as_sibling.rs @@ -0,0 +1,115 @@ +use std::collections::HashMap; +use std::fs::create_dir; +use std::hash::{DefaultHasher, Hash, Hasher}; +use std::path::Path; + +use anyhow::Result; +use libcontainer::container::builder::ContainerBuilder; +use libcontainer::syscall::syscall::SyscallType; +use libcontainer::workload::{ + Executor, ExecutorError, ExecutorSetEnvsError, ExecutorValidationError, +}; +use nix::unistd::{getegid, geteuid}; +use oci_spec::runtime::{RootBuilder, Spec}; +use procfs::process::Process; +use serial_test::serial; +use tempfile::tempdir; + +fn prepare_container_root(root: impl AsRef) -> Result<()> { + let root = root.as_ref(); + create_dir(root.join("rootfs"))?; + + let uid = geteuid().as_raw(); + let gid = getegid().as_raw(); + + let mut spec = Spec::rootless(uid, gid); + spec.set_root( + RootBuilder::default() + .path("rootfs") + .readonly(false) + .build() + .ok(), + ); + + spec.save(root.join("config.json"))?; + + Ok(()) +} + +fn hash(v: impl Hash) -> u64 { + let mut hasher = DefaultHasher::default(); + v.hash(&mut hasher); + hasher.finish() +} + +#[derive(Clone)] +struct SomeExecutor; + +impl Executor for SomeExecutor { + fn setup_envs(&self, _: HashMap) -> Result<(), ExecutorSetEnvsError> { + Ok(()) + } + + fn validate(&self, _: &Spec) -> Result<(), ExecutorValidationError> { + Ok(()) + } + + fn exec(&self, _: &Spec) -> Result<(), ExecutorError> { + Ok(()) + } +} + +#[test] +#[serial] +fn run_init_process_as_child() -> Result<()> { + let root = tempdir()?; + prepare_container_root(&root)?; + + let id = format!("test-container-{:x}", hash(root.as_ref())); + let container = ContainerBuilder::new(id, SyscallType::Linux) + .with_executor(SomeExecutor) + .with_root_path(root.as_ref())? + .as_init(root.as_ref()) + .build()?; + + let container = scopeguard::guard(container, |mut container| { + let _ = container.delete(true); + }); + + let init_pid = container.pid().unwrap().as_raw(); + + let init_ppid = Process::new(init_pid)?.stat()?.ppid; + let this_pid = Process::myself()?.pid(); + + assert_eq!(init_ppid, this_pid); + + Ok(()) +} + +#[test] +#[serial] +fn run_init_process_as_sibling() -> Result<()> { + let root = tempdir()?; + prepare_container_root(&root)?; + + let id = format!("test-container-{:x}", hash(root.as_ref())); + let container = ContainerBuilder::new(id, SyscallType::Linux) + .with_executor(SomeExecutor) + .with_root_path(root.as_ref())? + .as_init(root.as_ref()) + .as_sibling(true) + .build()?; + + let container = scopeguard::guard(container, |mut container| { + let _ = container.delete(true); + }); + + let init_pid = container.pid().unwrap().as_raw(); + + let init_ppid = Process::new(init_pid)?.stat()?.ppid; + let this_ppid = Process::myself()?.stat()?.ppid; + + assert_eq!(init_ppid, this_ppid); + + Ok(()) +}