Skip to content

Commit

Permalink
Merge pull request #23032 from giuseppe/drop-make-accessible
Browse files Browse the repository at this point in the history
libpod: do not chmod bind mounts
  • Loading branch information
openshift-merge-bot[bot] committed Jun 22, 2024
2 parents 42a01c0 + 49eb5af commit 25bc426
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 79 deletions.
81 changes: 71 additions & 10 deletions libpod/container_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import (
"slices"
"strconv"
"strings"
"sync"
"syscall"
"time"

metadata "github.com/checkpoint-restore/checkpointctl/lib"
Expand Down Expand Up @@ -544,16 +546,6 @@ func (c *Container) setupStorage(ctx context.Context) error {
c.config.StaticDir = containerInfo.Dir
c.state.RunDir = containerInfo.RunDir

if len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0 {
if err := idtools.SafeChown(containerInfo.RunDir, c.RootUID(), c.RootGID()); err != nil {
return err
}

if err := idtools.SafeChown(containerInfo.Dir, c.RootUID(), c.RootGID()); err != nil {
return err
}
}

// Set the default Entrypoint and Command
if containerInfo.Config != nil {
// Set CMD in the container to the default configuration only if ENTRYPOINT is not set by the user.
Expand Down Expand Up @@ -2372,6 +2364,75 @@ func (c *Container) setupOCIHooks(ctx context.Context, config *spec.Spec) (map[s
return allHooks, nil
}

// getRootPathForOCI returns the root path to use for the OCI runtime.
// If the current user is mapped in the container user namespace, then it returns
// the container's mountpoint directly from the storage.
// Otherwise, it returns an intermediate mountpoint that is accessible to anyone.
func (c *Container) getRootPathForOCI() (string, error) {
if hasCurrentUserMapped(c) {
return c.state.Mountpoint, nil
}
return c.getIntermediateMountpointUser()
}

var (
intermediateMountPoint string
intermediateMountPointErr error
intermediateMountPointSync sync.Mutex
)

// getIntermediateMountpointUser returns a path that is accessible to everyone. It must be on TMPDIR since
// the runroot/tmpdir used by libpod are accessible only to the owner.
// To avoid TOCTOU issues, the path must be owned by the current user's UID and GID.
// The path can be used by different containers, so a mount must be created only in a private mount namespace.
func (c *Container) recreateIntermediateMountpointUser() (string, error) {
uid := os.Geteuid()
gid := os.Getegid()
for i := 0; ; i++ {
tmpDir := os.Getenv("TMPDIR")
if tmpDir == "" {
tmpDir = "/tmp"
}
dir := filepath.Join(tmpDir, fmt.Sprintf("intermediate-mountpoint-%d.%d", rootless.GetRootlessUID(), i))
err := os.Mkdir(dir, 0755)
if err != nil {
if !errors.Is(err, os.ErrExist) {
return "", err
}
st, err2 := os.Stat(dir)
if err2 != nil {
return "", err
}
sys := st.Sys().(*syscall.Stat_t)
if !st.IsDir() || sys.Uid != uint32(uid) || sys.Gid != uint32(gid) {
continue
}
}
return dir, nil
}
}

// getIntermediateMountpointUser returns a path that is accessible to everyone.
// To avoid TOCTOU issues, the path must be owned by the current user's UID and GID.
// The path can be used by different containers, so a mount must be created only in a private mount namespace.
func (c *Container) getIntermediateMountpointUser() (string, error) {
intermediateMountPointSync.Lock()
defer intermediateMountPointSync.Unlock()

if intermediateMountPoint == "" || fileutils.Exists(intermediateMountPoint) != nil {
return c.recreateIntermediateMountpointUser()
}

// update the timestamp to prevent systemd-tmpfiles from removing it
now := time.Now()
if err := os.Chtimes(intermediateMountPoint, now, now); err != nil {
if !errors.Is(err, os.ErrNotExist) {
return c.recreateIntermediateMountpointUser()
}
}
return intermediateMountPoint, intermediateMountPointErr
}

// mount mounts the container's root filesystem
func (c *Container) mount() (string, error) {
if c.state.State == define.ContainerStateRemoving {
Expand Down
19 changes: 5 additions & 14 deletions libpod/container_internal_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,11 @@ func (c *Container) generateSpec(ctx context.Context) (s *spec.Spec, cleanupFunc
return nil, nil, err
}

g.SetRootPath(c.state.Mountpoint)
rootPath, err := c.getRootPathForOCI()
if err != nil {
return nil, nil, err
}
g.SetRootPath(rootPath)
g.AddAnnotation("org.opencontainers.image.stopSignal", strconv.FormatUint(uint64(c.config.StopSignal), 10))

if _, exists := g.Config.Annotations[annotations.ContainerManager]; !exists {
Expand Down Expand Up @@ -1834,10 +1838,6 @@ func (c *Container) mountIntoRootDirs(mountName string, mountPath string) error

// Make standard bind mounts to include in the container
func (c *Container) makeBindMounts() error {
if err := idtools.SafeChown(c.state.RunDir, c.RootUID(), c.RootGID()); err != nil {
return fmt.Errorf("cannot chown run directory: %w", err)
}

if c.state.BindMounts == nil {
c.state.BindMounts = make(map[string]string)
}
Expand Down Expand Up @@ -1917,15 +1917,6 @@ func (c *Container) makeBindMounts() error {
return fmt.Errorf("assigning mounts to container %s: %w", c.ID(), err)
}
}

if !hasCurrentUserMapped(c) {
if err := makeAccessible(resolvPath, c.RootUID(), c.RootGID()); err != nil {
return err
}
if err := makeAccessible(hostsPath, c.RootUID(), c.RootGID()); err != nil {
return err
}
}
} else {
if !c.config.UseImageResolvConf {
if err := c.createResolvConf(); err != nil {
Expand Down
38 changes: 2 additions & 36 deletions libpod/oci_conmon_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,23 +183,11 @@ func hasCurrentUserMapped(ctr *Container) bool {

// CreateContainer creates a container.
func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
// always make the run dir accessible to the current user so that the PID files can be read without
// being in the rootless user namespace.
if err := makeAccessible(ctr.state.RunDir, 0, 0); err != nil {
return 0, err
}
if !hasCurrentUserMapped(ctr) {
for _, i := range []string{ctr.state.RunDir, ctr.runtime.config.Engine.TmpDir, ctr.config.StaticDir, ctr.state.Mountpoint, ctr.runtime.config.Engine.VolumePath} {
if err := makeAccessible(i, ctr.RootUID(), ctr.RootGID()); err != nil {
return 0, err
}
}

// if we are running a non privileged container, be sure to umount some kernel paths so they are not
// bind mounted inside the container at all.
if !ctr.config.Privileged && !rootless.IsRootless() {
return r.createRootlessContainer(ctr, restoreOptions)
}
hideFiles := !ctr.config.Privileged && !rootless.IsRootless()
return r.createRootlessContainer(ctr, restoreOptions, hideFiles)
}
return r.createOCIContainer(ctr, restoreOptions)
}
Expand Down Expand Up @@ -979,28 +967,6 @@ func (r *ConmonOCIRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntime
return &conmon, &ocirt, nil
}

// makeAccessible changes the path permission and each parent directory to have --x--x--x
func makeAccessible(path string, uid, gid int) error {
for ; path != "/"; path = filepath.Dir(path) {
st, err := os.Stat(path)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid {
continue
}
if st.Mode()&0111 != 0111 {
if err := os.Chmod(path, st.Mode()|0111); err != nil {
return err
}
}
}
return nil
}

// Wait for a container which has been sent a signal to stop
func waitContainerStop(ctr *Container, timeout time.Duration) error {
return waitPidStop(ctr.state.PID, timeout)
Expand Down
2 changes: 1 addition & 1 deletion libpod/oci_conmon_freebsd.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
"os/exec"
)

func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions, hideFiles bool) (int64, error) {
return -1, errors.New("unsupported (*ConmonOCIRuntime) createRootlessContainer")
}

Expand Down
91 changes: 73 additions & 18 deletions libpod/oci_conmon_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
package libpod

import (
"errors"
"fmt"
"io/fs"
"os"
"os/exec"
"path/filepath"
Expand All @@ -25,7 +27,7 @@ import (
"golang.org/x/sys/unix"
)

func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions, hideFiles bool) (int64, error) {
type result struct {
restoreDuration int64
err error
Expand All @@ -40,35 +42,88 @@ func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOption
}
defer errorhandling.CloseQuiet(fd)

rootPath, err := ctr.getRootPathForOCI()
if err != nil {
return 0, err
}

// create a new mountns on the current thread
if err = unix.Unshare(unix.CLONE_NEWNS); err != nil {
return 0, err
}
defer func() {
if err := unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS); err != nil {
logrus.Errorf("Unable to clone new namespace: %q", err)
err := unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS)
if err == nil {
// If we are able to reset the previous mount namespace, unlock the thread and reuse it
runtime.UnlockOSThread()
} else {
// otherwise, leave the thread locked and the Go runtime will terminate it
logrus.Errorf("Unable to reset the previous mount namespace: %q", err)
}
}()

// don't spread our mounts around. We are setting only /sys to be slave
// so that the cleanup process is still able to umount the storage and the
// changes are propagated to the host.
err = unix.Mount("/sys", "/sys", "none", unix.MS_REC|unix.MS_SLAVE, "")
if err != nil {
return 0, fmt.Errorf("cannot make /sys slave: %w", err)
}

mounts, err := pmount.GetMounts()
if err != nil {
return 0, err
}
for _, m := range mounts {
if !strings.HasPrefix(m.Mountpoint, "/sys/kernel") {
continue
if rootPath != "" {
byMountpoint := make(map[string]*pmount.Info)
for _, m := range mounts {
byMountpoint[m.Mountpoint] = m
}
isShared := false
var parentMount string
for dir := filepath.Dir(rootPath); ; dir = filepath.Dir(dir) {
if m, found := byMountpoint[dir]; found {
parentMount = dir
for _, o := range strings.Split(m.Optional, ",") {
opt := strings.Split(o, ":")
if opt[0] == "shared" {
isShared = true
break
}
}
break
}
if dir == "/" {
return 0, fmt.Errorf("cannot find mountpoint for the root path")
}
}

// do not propagate the bind mount on the parent mount namespace
if err := unix.Mount("", parentMount, "", unix.MS_SLAVE, ""); err != nil {
return 0, fmt.Errorf("failed to make %s slave: %w", parentMount, err)
}

// bind mount the containers' mount path to the path where the OCI runtime expects it to be
if err := unix.Mount(ctr.state.Mountpoint, rootPath, "", unix.MS_BIND, ""); err != nil {
return 0, fmt.Errorf("failed to bind mount %s to %s: %w", ctr.state.Mountpoint, rootPath, err)
}

if isShared {
// we need to restore the shared propagation of the parent mount so that we don't break -v $SRC:$DST:shared in the container
// if $SRC is on the same mount as the root path
if err := unix.Mount("", parentMount, "", unix.MS_SHARED, ""); err != nil {
return 0, fmt.Errorf("failed to restore MS_SHARED propagation for %s: %w", parentMount, err)
}
}
}

if hideFiles {
// don't spread our mounts around. We are setting only /sys to be slave
// so that the cleanup process is still able to umount the storage and the
// changes are propagated to the host.
err = unix.Mount("/sys", "/sys", "none", unix.MS_REC|unix.MS_SLAVE, "")
if err != nil {
return 0, fmt.Errorf("cannot make /sys slave: %w", err)
}
err = unix.Unmount(m.Mountpoint, 0)
if err != nil && !os.IsNotExist(err) {
return 0, fmt.Errorf("cannot unmount %s: %w", m.Mountpoint, err)
for _, m := range mounts {
if !strings.HasPrefix(m.Mountpoint, "/sys/kernel") {
continue
}
err = unix.Unmount(m.Mountpoint, 0)
if err != nil && !errors.Is(err, fs.ErrNotExist) {
return 0, fmt.Errorf("cannot unmount %s: %w", m.Mountpoint, err)
}
}
}
return r.createOCIContainer(ctr, restoreOptions)
Expand Down

0 comments on commit 25bc426

Please sign in to comment.