Skip to content

Commit

Permalink
Implement FD remapping
Browse files Browse the repository at this point in the history
  • Loading branch information
aidanhs committed Dec 7, 2024
1 parent e4ec29c commit 43c8664
Show file tree
Hide file tree
Showing 9 changed files with 95 additions and 13 deletions.
30 changes: 27 additions & 3 deletions crates/libcontainer/src/container/builder.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::os::fd::OwnedFd;
use std::os::fd::{OwnedFd, RawFd};
use std::path::PathBuf;

use super::init_builder::InitContainerBuilder;
Expand All @@ -20,8 +20,10 @@ pub struct ContainerBuilder {
pub(super) pid_file: Option<PathBuf>,
/// Socket to communicate the file descriptor of the ptty
pub(super) console_socket: Option<PathBuf>,
/// File descriptors to be passed into the container process
/// Number of file descriptors to be passed into the container process
pub(super) preserve_fds: i32,
/// File descriptors to be remapped into the container process
pub(super) remap_fds: Vec<(RawFd, RawFd)>,
/// The function that actually runs on the container init process. Default
/// is to execute the specified command in the oci spec.
pub(super) executor: Box<dyn Executor>,
Expand Down Expand Up @@ -76,6 +78,7 @@ impl ContainerBuilder {
pid_file: None,
console_socket: None,
preserve_fds: 0,
remap_fds: vec![],
executor: workload::default::get_executor(),
stdin: None,
stdout: None,
Expand Down Expand Up @@ -231,7 +234,7 @@ impl ContainerBuilder {
}

/// Sets the number of additional file descriptors which will be passed into
/// the container process.
/// the container process, over and above stdio (0, 1, 2).
/// # Example
///
/// ```no_run
Expand All @@ -242,13 +245,34 @@ impl ContainerBuilder {
/// "74f1a4cb3801".to_owned(),
/// SyscallType::default(),
/// )
/// // Will pass FDs <= 7
/// .with_preserved_fds(5);
/// ```
pub fn with_preserved_fds(mut self, preserved_fds: i32) -> Self {
self.preserve_fds = preserved_fds;
self
}

/// Sets a list of file descriptors that will be mapped and passed into
/// the container process (ignoring any limits on preserved fds)
/// # Example
///
/// ```no_run
/// # use libcontainer::container::builder::ContainerBuilder;
/// # use libcontainer::syscall::syscall::SyscallType;
///
/// ContainerBuilder::new(
/// "74f1a4cb3801".to_owned(),
/// SyscallType::default(),
/// )
/// // Overwrites stderr with FD 15, and explicitly passes FD 10
/// .with_remapped_fds(vec![(15, 2), (10, 10)]);
/// ```
pub fn with_remapped_fds(mut self, remapped_fds: Vec<(RawFd, RawFd)>) -> Self {
self.remap_fds = remapped_fds;
self
}

/// Sets the function that actually runs on the container init process.
/// # Example
///
Expand Down
5 changes: 4 additions & 1 deletion crates/libcontainer/src/container/builder_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,10 @@ pub(super) struct ContainerBuilderImpl {
pub user_ns_config: Option<UserNamespaceConfig>,
/// Path to the Unix Domain Socket to communicate container start
pub notify_path: PathBuf,
/// File descriptos preserved/passed to the container init process.
/// Number of file descriptors preserved/passed to the container init process.
pub preserve_fds: i32,
/// File descriptors remapped into the container process
pub remap_fds: Vec<(RawFd, RawFd)>,

Check failure on line 45 in crates/libcontainer/src/container/builder_impl.rs

View workflow job for this annotation

GitHub Actions / tests (x86_64, gnu)

cannot find type `RawFd` in this scope

Check failure on line 45 in crates/libcontainer/src/container/builder_impl.rs

View workflow job for this annotation

GitHub Actions / tests (x86_64, gnu)

cannot find type `RawFd` in this scope

Check failure on line 45 in crates/libcontainer/src/container/builder_impl.rs

View workflow job for this annotation

GitHub Actions / tests (x86_64, musl)

cannot find type `RawFd` in this scope

Check failure on line 45 in crates/libcontainer/src/container/builder_impl.rs

View workflow job for this annotation

GitHub Actions / tests (x86_64, musl)

cannot find type `RawFd` in this scope
/// If the container is to be run in detached mode
pub detached: bool,
/// Default executes the specified execution of a generic command
Expand Down Expand Up @@ -146,6 +148,7 @@ impl ContainerBuilderImpl {
console_socket: self.console_socket.as_ref().map(|c| c.as_raw_fd()),
notify_listener,
preserve_fds: self.preserve_fds,
remap_fds: self.remap_fds.clone(),
user_ns_config: self.user_ns_config.to_owned(),
cgroup_config: self.cgroup_config.clone(),
detached: self.detached,
Expand Down
1 change: 1 addition & 0 deletions crates/libcontainer/src/container/init_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ impl InitContainerBuilder {
user_ns_config,
notify_path,
preserve_fds: self.base.preserve_fds,
remap_fds: self.base.remap_fds,
detached: self.detached,
executor: self.base.executor,
no_pivot: self.no_pivot,
Expand Down
1 change: 1 addition & 0 deletions crates/libcontainer/src/container/tenant_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ impl TenantContainerBuilder {
user_ns_config,
notify_path: notify_path.clone(),
preserve_fds: self.base.preserve_fds,
remap_fds: self.base.remap_fds,
detached: self.detached,
executor: self.base.executor,
no_pivot: false,
Expand Down
2 changes: 2 additions & 0 deletions crates/libcontainer/src/process/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ pub struct ContainerArgs {
pub notify_listener: NotifyListener,
/// File descriptors preserved/passed to the container init process.
pub preserve_fds: i32,
/// File descriptors preserved/passed to the container init process.
pub remap_fds: Vec<(RawFd, RawFd)>,
/// Options for new namespace creation
pub user_ns_config: Option<UserNamespaceConfig>,
/// Cgroup Manager Config
Expand Down
18 changes: 14 additions & 4 deletions crates/libcontainer/src/process/container_init_process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,11 @@ pub fn container_init_process(
}
}

if proc.args().is_none() {
tracing::error!("on non-Windows, at least one process arg entry is required");
Err(MissingSpecError::Args)?;
}

args.executor.validate(spec)?;
args.executor.setup_envs(envs)?;

Expand Down Expand Up @@ -702,10 +707,15 @@ pub fn container_init_process(
}
}

if proc.args().is_none() {
tracing::error!("on non-Windows, at least one process arg entry is required");
Err(MissingSpecError::Args)?;
}
// Remap any FDs the user wants to pass to the container. This has to happen when
// no other FDs at all are open, otherwise the user could accidentally clobber
// an FD that's in use by youki. Unfortunately this can conflict with seccomp -
// the way to fix this would be to relocate in-use FDs out of the way of user
// requested ones, but that is quite hard.
syscall.remap_passed_fds(&args.remap_fds).map_err(|err| {
tracing::error!(?err, "failed to remap fds to be passed");
InitProcessError::SyscallOther(err)
})?;

args.executor.exec(spec).map_err(|err| {
tracing::error!(?err, "failed to execute payload");
Expand Down
42 changes: 38 additions & 4 deletions crates/libcontainer/src/syscall/linux.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
//! Implements Command trait for Linux systems
use std::any::Any;
use std::ffi::{CStr, CString, OsStr};
use std::os::fd::BorrowedFd;
use std::os::fd::{BorrowedFd, RawFd};
use std::os::unix::ffi::OsStrExt;
use std::os::unix::fs::symlink;
use std::os::unix::io::RawFd;
use std::path::Path;
use std::str::FromStr;
use std::sync::Arc;
Expand All @@ -17,7 +16,7 @@ use nix::fcntl::{open, OFlag};
use nix::mount::{mount, umount2, MntFlags, MsFlags};
use nix::sched::{unshare, CloneFlags};
use nix::sys::stat::{mknod, Mode, SFlag};
use nix::unistd::{chown, chroot, close, fchdir, pivot_root, sethostname, Gid, Uid};
use nix::unistd::{chown, chroot, close, dup2, fchdir, pivot_root, sethostname, Gid, Uid};
use oci_spec::runtime::PosixRlimit;

use super::{Result, Syscall, SyscallError};
Expand Down Expand Up @@ -179,7 +178,13 @@ impl LinuxSyscall {
to_be_cleaned_up_fds.iter().for_each(|&fd| {
// Intentionally ignore errors here -- the cases where this might fail
// are basically file descriptors that have already been closed.
let _ = fcntl::fcntl(fd, fcntl::F_SETFD(fcntl::FdFlag::FD_CLOEXEC));
match fcntl::fcntl(fd, fcntl::F_GETFD) {
Ok(flags) => {

Check warning on line 182 in crates/libcontainer/src/syscall/linux.rs

View workflow job for this annotation

GitHub Actions / check (x86_64, gnu)

Diff in /home/runner/work/youki/youki/crates/libcontainer/src/syscall/linux.rs

Check warning on line 182 in crates/libcontainer/src/syscall/linux.rs

View workflow job for this annotation

GitHub Actions / check (x86_64, musl)

Diff in /home/runner/work/youki/youki/crates/libcontainer/src/syscall/linux.rs

Check warning on line 182 in crates/libcontainer/src/syscall/linux.rs

View workflow job for this annotation

GitHub Actions / check (aarch64, gnu)

Diff in /home/runner/work/youki/youki/crates/libcontainer/src/syscall/linux.rs

Check warning on line 182 in crates/libcontainer/src/syscall/linux.rs

View workflow job for this annotation

GitHub Actions / check (aarch64, musl)

Diff in /home/runner/work/youki/youki/crates/libcontainer/src/syscall/linux.rs
let flags = fcntl::FdFlag::from_bits_retain(flags) & fcntl::FdFlag::FD_CLOEXEC;
let _ = fcntl::fcntl(fd, fcntl::F_SETFD(flags));
},
Err(_) => (),
}
});

Ok(())
Expand Down Expand Up @@ -527,6 +532,35 @@ impl Syscall for LinuxSyscall {
Ok(())
}

// Remap FDs and clear the cloexec flag if set
fn remap_passed_fds(&self, remap_fds: &[(RawFd, RawFd)]) -> Result<()> {
for &(oldfd, newfd) in remap_fds {
if oldfd != newfd {
// dup2 clears cloexec
dup2(oldfd, newfd).map_err(|errno| {
tracing::error!(?errno, ?oldfd, ?newfd, "failed to remap fd");
errno
})?;
close(oldfd).map_err(|errno| {
tracing::error!(?errno, ?oldfd, "failed to close fd");
errno
})?;
} else {
// dup2 is a no-op if the FDs match, so just clear cloexec
let flags = fcntl::fcntl(oldfd, fcntl::F_GETFD).map_err(|errno| {
tracing::error!(?errno, ?oldfd, "failed to get fd flags");
errno
})?;
let flags = fcntl::FdFlag::from_bits_retain(flags) & !fcntl::FdFlag::FD_CLOEXEC;
fcntl::fcntl(oldfd, fcntl::F_SETFD(flags)).map_err(|errno| {
tracing::error!(?errno, ?oldfd, "failed to clear cloexec");
errno
})?;
}
}
Ok(())
}

fn mount_setattr(
&self,
dirfd: RawFd,
Expand Down
4 changes: 3 additions & 1 deletion crates/libcontainer/src/syscall/syscall.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
//! implementation details
use std::any::Any;
use std::ffi::OsStr;
use std::os::fd::RawFd;
use std::path::Path;
use std::sync::Arc;

Expand All @@ -24,7 +25,7 @@ pub trait Syscall {
fn as_any(&self) -> &dyn Any;
fn pivot_rootfs(&self, path: &Path) -> Result<()>;
fn chroot(&self, path: &Path) -> Result<()>;
fn set_ns(&self, rawfd: i32, nstype: CloneFlags) -> Result<()>;
fn set_ns(&self, rawfd: RawFd, nstype: CloneFlags) -> Result<()>;
fn set_id(&self, uid: Uid, gid: Gid) -> Result<()>;
fn unshare(&self, flags: CloneFlags) -> Result<()>;
fn set_capability(&self, cset: CapSet, value: &CapsHashSet) -> Result<()>;
Expand All @@ -45,6 +46,7 @@ pub trait Syscall {
fn chown(&self, path: &Path, owner: Option<Uid>, group: Option<Gid>) -> Result<()>;
fn set_groups(&self, groups: &[Gid]) -> Result<()>;
fn close_range(&self, preserve_fds: i32) -> Result<()>;
fn remap_passed_fds(&self, remap_fds: &[(RawFd, RawFd)]) -> Result<()>;
fn mount_setattr(
&self,
dirfd: i32,
Expand Down
5 changes: 5 additions & 0 deletions crates/libcontainer/src/syscall/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::any::Any;
use std::cell::{Ref, RefCell, RefMut};
use std::collections::HashMap;
use std::ffi::{OsStr, OsString};
use std::os::fd::RawFd;
use std::path::{Path, PathBuf};
use std::sync::Arc;

Expand Down Expand Up @@ -249,6 +250,10 @@ impl Syscall for TestHelperSyscall {
todo!()
}

fn remap_passed_fds(&self, _: &[(RawFd, RawFd)]) -> Result<()> {
todo!()
}

fn mount_setattr(
&self,
_: i32,
Expand Down

0 comments on commit 43c8664

Please sign in to comment.