Skip to content

Commit 0a0b6a7

Browse files
authored
Merge pull request #1174 from elezar/no-pivot-root
Allow update-ldcache hook to work when pivot-root is not supported
2 parents d08196f + 241743c commit 0a0b6a7

File tree

105 files changed

+13395
-6
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

105 files changed

+13395
-6
lines changed

go.mod

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@ require (
66
github.com/NVIDIA/go-nvlib v0.8.1
77
github.com/NVIDIA/go-nvml v0.13.0-1
88
github.com/google/uuid v1.6.0
9+
github.com/moby/sys/mountinfo v0.7.2
910
github.com/moby/sys/reexec v0.1.0
1011
github.com/moby/sys/symlink v0.3.0
1112
github.com/opencontainers/runc v1.3.3
1213
github.com/opencontainers/runtime-spec v1.3.0
1314
github.com/pelletier/go-toml v1.9.5
15+
github.com/prometheus/procfs v0.19.2
1416
github.com/sirupsen/logrus v1.9.3
1517
github.com/stretchr/testify v1.11.1
1618
github.com/urfave/cli-altsrc/v3 v3.1.0
@@ -26,7 +28,6 @@ require (
2628
github.com/cyphar/filepath-securejoin v0.6.0 // indirect
2729
github.com/davecgh/go-spew v1.1.1 // indirect
2830
github.com/fsnotify/fsnotify v1.7.0 // indirect
29-
github.com/google/go-cmp v0.6.0 // indirect
3031
github.com/hashicorp/errwrap v1.1.0 // indirect
3132
github.com/kr/pretty v0.3.1 // indirect
3233
github.com/moby/sys/capability v0.4.0 // indirect

go.sum

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
1515
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
1616
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
1717
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
18-
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
19-
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
18+
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
19+
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
2020
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
2121
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
2222
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
@@ -32,6 +32,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
3232
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
3333
github.com/moby/sys/capability v0.4.0 h1:4D4mI6KlNtWMCM1Z/K0i7RV1FkX+DBDHKVJpCndZoHk=
3434
github.com/moby/sys/capability v0.4.0/go.mod h1:4g9IK291rVkms3LKCDOoYlnV8xKwoDTpIrNEE35Wq0I=
35+
github.com/moby/sys/mountinfo v0.7.2 h1:1shs6aH5s4o5H2zQLn796ADW1wMrIwHsyJ2v9KouLrg=
36+
github.com/moby/sys/mountinfo v0.7.2/go.mod h1:1YOa8w8Ih7uW0wALDUgT1dTTSBrZ+HiBLGws92L2RU4=
3537
github.com/moby/sys/reexec v0.1.0 h1:RrBi8e0EBTLEgfruBOFcxtElzRGTEUkeIFaVXgU7wok=
3638
github.com/moby/sys/reexec v0.1.0/go.mod h1:EqjBg8F3X7iZe5pU6nRZnYCMUTXoxsjiIfHup5wYIN8=
3739
github.com/moby/sys/symlink v0.3.0 h1:GZX89mEZ9u53f97npBy4Rc3vJKj7JBDj/PN2I22GrNU=
@@ -51,6 +53,8 @@ github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCko
5153
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
5254
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
5355
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
56+
github.com/prometheus/procfs v0.19.2 h1:zUMhqEW66Ex7OXIiDkll3tl9a1ZdilUOd/F6ZXw4Vws=
57+
github.com/prometheus/procfs v0.19.2/go.mod h1:M0aotyiemPhBCM0z5w87kL22CxfcH05ZpYlu+b4J7mw=
5458
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
5559
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
5660
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=

internal/ldconfig/ldconfig.go

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ import (
2727
"runtime"
2828
"strings"
2929

30+
"github.com/prometheus/procfs"
31+
3032
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
3133
)
3234

@@ -59,6 +61,7 @@ type Ldconfig struct {
5961
inRoot string
6062
isDebianLikeHost bool
6163
isDebianLikeContainer bool
64+
noPivotRoot bool
6265
directories []string
6366
}
6467

@@ -72,6 +75,11 @@ func NewRunner(id string, ldconfigPath string, containerRoot string, additionala
7275
if isDebianLike() {
7376
args = append(args, "--is-debian-like-host")
7477
}
78+
79+
if noPivotRoot() {
80+
args = append(args, "--no-pivot")
81+
}
82+
7583
args = append(args, additionalargs...)
7684

7785
return createReexecCommand(args)
@@ -94,6 +102,7 @@ func NewRunner(id string, ldconfigPath string, containerRoot string, additionala
94102
// --is-debian-like-host Indicates that the host system is debian-like (e.g. Debian, Ubuntu)
95103
// as opposed to non-Debian-like (e.g. RHEL, Fedora)
96104
// See https://github.com/NVIDIA/nvidia-container-toolkit/pull/1444
105+
// --no-pivot pivot_root should not be used to provide process isolation.
97106
//
98107
// The remaining args are folders where soname symlinks need to be created.
99108
func NewFromArgs(args ...string) (*Ldconfig, error) {
@@ -107,6 +116,7 @@ func NewFromArgs(args ...string) (*Ldconfig, error) {
107116
This allows us to handle the case where there are differences in behavior
108117
between the ldconfig from the host (as executed from an update-ldcache hook) and
109118
ldconfig in the container. Such differences include system search paths.`)
119+
noPivot := fs.Bool("no-pivot", false, "don't use pivot_root to perform isolation")
110120
if err := fs.Parse(args[1:]); err != nil {
111121
return nil, err
112122
}
@@ -122,6 +132,7 @@ ldconfig in the container. Such differences include system search paths.`)
122132
ldconfigPath: *ldconfigPath,
123133
inRoot: *containerRoot,
124134
isDebianLikeHost: *isDebianLikeHost,
135+
noPivotRoot: *noPivot,
125136
directories: fs.Args(),
126137
}
127138
return l, nil
@@ -194,7 +205,7 @@ func (l *Ldconfig) prepareRoot() (string, error) {
194205

195206
// We pivot to the container root for the new process, this further limits
196207
// access to the host.
197-
if err := pivotRoot(root.Name()); err != nil {
208+
if err := l.pivotRoot(root); err != nil {
198209
return "", fmt.Errorf("error running pivot_root: %w", err)
199210
}
200211

@@ -456,3 +467,50 @@ func debianSystemSearchPaths() []string {
456467
paths = append(paths, "/lib", "/usr/lib")
457468
return paths
458469
}
470+
471+
func (l *Ldconfig) pivotRoot(root *os.Root) error {
472+
rootDir := root.Name()
473+
// We select the function to pivot the root based on whether pivot_root is
474+
// supported.
475+
// See https://github.com/opencontainers/runc/blob/c3d127f6e8d9f6c06d78b8329cafa8dd39f6236e/libcontainer/rootfs_linux.go#L207-L216
476+
if l.noPivotRoot {
477+
return msMoveRoot(rootDir)
478+
}
479+
return pivotRoot(rootDir)
480+
}
481+
482+
// noPivotRoot checks whether the current root filesystem supports a pivot_root.
483+
// See https://github.com/opencontainers/runc/blob/main/libcontainer/SPEC.md#filesystem
484+
// for a discussion on when this is not the case.
485+
// If we fail to detect whether pivot-root is supported, we assume that it is supported.
486+
// The logic to check for support is adapted from kata-containers:
487+
//
488+
// https://github.com/kata-containers/kata-containers/blob/e7b9eddcede4bbe2edeb9c3af7b2358dc65da76f/src/agent/src/sandbox.rs#L150
489+
//
490+
// and checks whether "/" is mounted as a rootfs.
491+
func noPivotRoot() bool {
492+
rootFsType, err := getRootfsType("/")
493+
if err != nil {
494+
return false
495+
}
496+
return rootFsType == "rootfs"
497+
}
498+
499+
func getRootfsType(path string) (string, error) {
500+
procSelf, err := procfs.Self()
501+
if err != nil {
502+
return "", err
503+
}
504+
505+
mountStats, err := procSelf.MountStats()
506+
if err != nil {
507+
return "", err
508+
}
509+
510+
for _, mountStat := range mountStats {
511+
if mountStat.Mount == path {
512+
return mountStat.Type, nil
513+
}
514+
}
515+
return "", fmt.Errorf("mount stats for %q not found", path)
516+
}

internal/ldconfig/ldconfig_linux.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,17 @@
2020
package ldconfig
2121

2222
import (
23+
"errors"
2324
"fmt"
2425
"os"
2526
"os/exec"
2627
"path/filepath"
2728
"strconv"
29+
"strings"
2830
"syscall"
2931

3032
"github.com/google/uuid"
33+
"github.com/moby/sys/mountinfo"
3134
"github.com/moby/sys/reexec"
3235

3336
"github.com/opencontainers/runc/libcontainer/utils"
@@ -98,6 +101,86 @@ func pivotRoot(rootfs string) error {
98101
return nil
99102
}
100103

104+
// msMoveRoot is used in cases where pivot root is not supported.
105+
// This includes initramfs filesystems where the root is read-only.
106+
// This is adapted from the implementation here:
107+
//
108+
// https://github.com/opencontainers/runc/blob/e89a29929c775025419ab0d218a43588b4c12b9a/libcontainer/rootfs_linux.go#L1115
109+
//
110+
// With the `mount` and `unmount` calls changed to direct unix.Mount and unix.Unmount calls.
111+
func msMoveRoot(rootfs string) error {
112+
// Before we move the root and chroot we have to mask all "full" sysfs and
113+
// procfs mounts which exist on the host. This is because while the kernel
114+
// has protections against mounting procfs if it has masks, when using
115+
// chroot(2) the *host* procfs mount is still reachable in the mount
116+
// namespace and the kernel permits procfs mounts inside --no-pivot
117+
// containers.
118+
//
119+
// Users shouldn't be using --no-pivot except in exceptional circumstances,
120+
// but to avoid such a trivial security flaw we apply a best-effort
121+
// protection here. The kernel only allows a mount of a pseudo-filesystem
122+
// like procfs or sysfs if there is a *full* mount (the root of the
123+
// filesystem is mounted) without any other locked mount points covering a
124+
// subtree of the mount.
125+
//
126+
// So we try to unmount (or mount tmpfs on top of) any mountpoint which is
127+
// a full mount of either sysfs or procfs (since those are the most
128+
// concerning filesystems to us).
129+
mountinfos, err := mountinfo.GetMounts(func(info *mountinfo.Info) (skip, stop bool) {
130+
// Collect every sysfs and procfs filesystem, except for those which
131+
// are non-full mounts or are inside the rootfs of the container.
132+
if info.Root != "/" ||
133+
(info.FSType != "proc" && info.FSType != "sysfs") ||
134+
strings.HasPrefix(info.Mountpoint, rootfs) {
135+
skip = true
136+
}
137+
return
138+
})
139+
if err != nil {
140+
return err
141+
}
142+
for _, info := range mountinfos {
143+
p := info.Mountpoint
144+
// Be sure umount events are not propagated to the host.
145+
if err := unix.Mount("", p, "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
146+
if errors.Is(err, unix.ENOENT) {
147+
// If the mountpoint doesn't exist that means that we've
148+
// already blasted away some parent directory of the mountpoint
149+
// and so we don't care about this error.
150+
continue
151+
}
152+
return err
153+
}
154+
if err := unix.Unmount(p, unix.MNT_DETACH); err != nil {
155+
if !errors.Is(err, unix.EINVAL) && !errors.Is(err, unix.EPERM) {
156+
return err
157+
} else {
158+
// If we have not privileges for umounting (e.g. rootless), then
159+
// cover the path.
160+
if err := unix.Mount("tmpfs", p, "tmpfs", 0, ""); err != nil {
161+
return err
162+
}
163+
}
164+
}
165+
}
166+
167+
// Move the rootfs on top of "/" in our mount namespace.
168+
if err := unix.Mount(rootfs, "/", "", unix.MS_MOVE, ""); err != nil {
169+
return err
170+
}
171+
return chroot()
172+
}
173+
174+
func chroot() error {
175+
if err := unix.Chroot("."); err != nil {
176+
return &os.PathError{Op: "chroot", Path: ".", Err: err}
177+
}
178+
if err := unix.Chdir("/"); err != nil {
179+
return &os.PathError{Op: "chdir", Path: "/", Err: err}
180+
}
181+
return nil
182+
}
183+
101184
// mountLdConfig mounts the host ldconfig to the mount namespace of the hook.
102185
// We use WithProcfd to perform the mount operations to ensure that the changes
103186
// are persisted across the pivot root.

internal/ldconfig/ldconfig_other.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ func pivotRoot(newroot string) error {
2929
return fmt.Errorf("not supported")
3030
}
3131

32+
func msMoveRoot(rootfs string) error {
33+
return fmt.Errorf("not supported")
34+
}
35+
3236
func mountLdConfig(hostLdconfigPath string, containerRoot *os.Root) (string, error) {
3337
return "", fmt.Errorf("not supported")
3438
}

0 commit comments

Comments
 (0)