diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 12ee6a3ef95..b1efa1ab5de 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -346,6 +346,18 @@ func (c *Container) start(process *Process) (retErr error) { if err := utils.CloseExecFrom(3); err != nil { return fmt.Errorf("unable to mark non-stdio fds as cloexec: %w", err) } + + // If the root in the container is different from current root user, we + // need to change the owner of stdio before we enter the user namespace, + // or else we may can't access stdio in the container. + // Please see https://github.com/opencontainers/runc/issues/4475 + containerRootUid, err := c.config.HostRootUID() + if err != nil { + return fmt.Errorf("unable to get root uid of the container: %w", err) + } + if err := fixStdioPermissions(containerRootUid); err != nil { + return fmt.Errorf("unable to change permission of stdio: %w", err) + } if err := parent.start(); err != nil { return fmt.Errorf("unable to start container process: %w", err) } @@ -506,6 +518,48 @@ func isDmzBinarySafe(c *configs.Config) bool { return false } +// fixStdioPermissions fixes the permissions of STDIO to the specified user. +func fixStdioPermissions(uid int) error { + var null unix.Stat_t + if err := unix.Stat("/dev/null", &null); err != nil { + return &os.PathError{Op: "stat", Path: "/dev/null", Err: err} + } + for _, file := range []*os.File{os.Stdin, os.Stdout, os.Stderr} { + var s unix.Stat_t + if err := unix.Fstat(int(file.Fd()), &s); err != nil { + return &os.PathError{Op: "fstat", Path: file.Name(), Err: err} + } + + // Skip chown if uid is already the one we want or any of the STDIO descriptors + // were redirected to /dev/null. + if int(s.Uid) == uid || s.Rdev == null.Rdev { + continue + } + + // We only change the uid (as it is possible for the mount to + // prefer a different gid, and there's no reason for us to change it). + // The reason why we don't just leave the default uid=X mount setup is + // that users expect to be able to actually use their console. Without + // this code, you couldn't effectively run as a non-root user inside a + // container and also have a console set up. + if err := file.Chown(uid, int(s.Gid)); err != nil { + // If we've hit an EINVAL then s.Gid isn't mapped in the user + // namespace. If we've hit an EPERM then the inode's current owner + // is not mapped in our user namespace (in particular, + // privileged_wrt_inode_uidgid() has failed). Read-only + // /dev can result in EROFS error. In any case, it's + // better for us to just not touch the stdio rather + // than bail at this point. + + if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) || errors.Is(err, unix.EROFS) { + continue + } + return err + } + } + return nil +} + func (c *Container) newParentProcess(p *Process) (parentProcess, error) { comm, err := newProcessComm() if err != nil { diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index 176a4cdc12b..1e81c1729d5 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -516,7 +516,7 @@ func setupUser(config *initConfig) error { // Before we change to the container's user make sure that the processes // STDIO is correctly owned by the user that we are switching to. - if err := fixStdioPermissions(execUser); err != nil { + if err := fixStdioPermissions(execUser.Uid); err != nil { return err } @@ -563,50 +563,6 @@ func setupUser(config *initConfig) error { return nil } -// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user. -// The ownership needs to match because it is created outside of the container and needs to be -// localized. -func fixStdioPermissions(u *user.ExecUser) error { - var null unix.Stat_t - if err := unix.Stat("/dev/null", &null); err != nil { - return &os.PathError{Op: "stat", Path: "/dev/null", Err: err} - } - for _, file := range []*os.File{os.Stdin, os.Stdout, os.Stderr} { - var s unix.Stat_t - if err := unix.Fstat(int(file.Fd()), &s); err != nil { - return &os.PathError{Op: "fstat", Path: file.Name(), Err: err} - } - - // Skip chown if uid is already the one we want or any of the STDIO descriptors - // were redirected to /dev/null. - if int(s.Uid) == u.Uid || s.Rdev == null.Rdev { - continue - } - - // We only change the uid (as it is possible for the mount to - // prefer a different gid, and there's no reason for us to change it). - // The reason why we don't just leave the default uid=X mount setup is - // that users expect to be able to actually use their console. Without - // this code, you couldn't effectively run as a non-root user inside a - // container and also have a console set up. - if err := file.Chown(u.Uid, int(s.Gid)); err != nil { - // If we've hit an EINVAL then s.Gid isn't mapped in the user - // namespace. If we've hit an EPERM then the inode's current owner - // is not mapped in our user namespace (in particular, - // privileged_wrt_inode_uidgid() has failed). Read-only - // /dev can result in EROFS error. In any case, it's - // better for us to just not touch the stdio rather - // than bail at this point. - - if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) || errors.Is(err, unix.EROFS) { - continue - } - return err - } - } - return nil -} - // setupNetwork sets up and initializes any network interface inside the container. func setupNetwork(config *initConfig) error { for _, config := range config.Networks {