Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Using fexecve to protect the entrypoint of the container #62

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/opencontainers/runc

go 1.20
go 1.21

require (
github.com/checkpoint-restore/go-criu/v6 v6.3.0
Expand All @@ -13,6 +13,7 @@ require (
github.com/moby/sys/mountinfo v0.7.1
github.com/moby/sys/user v0.1.0
github.com/mrunalp/fileutils v0.5.1
github.com/opencontainers-sec/go-containersec v0.0.2
github.com/opencontainers/runtime-spec v1.1.1-0.20230823135140-4fec88fd00a4
github.com/opencontainers/selinux v1.11.0
github.com/seccomp/libseccomp-golang v0.10.0
Expand Down
7 changes: 7 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,35 @@ github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/frankban/quicktest v1.14.5 h1:dfYrrRyLtiqT9GyKXgdh+k4inNeTvmGbuSgZ3lx3GhA=
github.com/frankban/quicktest v1.14.5/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk=
github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/moby/sys/mountinfo v0.7.1 h1:/tTvQaSJRr2FshkhXiIpux6fQ2Zvc4j7tAhMTStAG2g=
github.com/moby/sys/mountinfo v0.7.1/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI=
github.com/moby/sys/user v0.1.0 h1:WmZ93f5Ux6het5iituh9x2zAG7NFY9Aqi49jjE1PaQg=
github.com/moby/sys/user v0.1.0/go.mod h1:fKJhFOnsCN6xZ5gSfbM6zaHGgDJMrqt9/reuj4T7MmU=
github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm/Q=
github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/opencontainers-sec/go-containersec v0.0.2 h1:E37DR3CH9VWRJhr4+0VZbjdMQTR3371ijJiUGpQVOOM=
github.com/opencontainers-sec/go-containersec v0.0.2/go.mod h1:8tU3XOqpsj1/WwjTsJa78OkCuJpQ9VrjXkgITmqzeUw=
github.com/opencontainers/runtime-spec v1.1.1-0.20230823135140-4fec88fd00a4 h1:EctkgBjZ1y4q+sibyuuIgiKpa0QSd2elFtSSdNvBVow=
github.com/opencontainers/runtime-spec v1.1.1-0.20230823135140-4fec88fd00a4/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU=
github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/seccomp/libseccomp-golang v0.10.0 h1:aA4bp+/Zzi0BnWZ2F1wgNBs5gTpm+na2rWM6M9YjLpY=
Expand Down
126 changes: 1 addition & 125 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,8 @@ import (

"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/dmz"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/system/kernelversion"
"github.com/opencontainers/runc/libcontainer/utils"
)

Expand Down Expand Up @@ -443,117 +441,13 @@ func (c *Container) includeExecFifo(cmd *exec.Cmd) error {
return nil
}

// No longer needed in Go 1.21.
func slicesContains[S ~[]E, E comparable](slice S, needle E) bool {
for _, val := range slice {
if val == needle {
return true
}
}
return false
}

func isDmzBinarySafe(c *configs.Config) bool {
if !dmz.WorksWithSELinux(c) {
return false
}

// Because we set the dumpable flag in nsexec, the only time when it is
// unsafe to use runc-dmz is when the container process would be able to
// race against "runc init" and bypass the ptrace_may_access() checks.
//
// This is only the case if the container processes could have
// CAP_SYS_PTRACE somehow (i.e. the capability is present in the bounding,
// inheritable, or ambient sets). Luckily, most containers do not have this
// capability.
if c.Capabilities == nil ||
(!slicesContains(c.Capabilities.Bounding, "CAP_SYS_PTRACE") &&
!slicesContains(c.Capabilities.Inheritable, "CAP_SYS_PTRACE") &&
!slicesContains(c.Capabilities.Ambient, "CAP_SYS_PTRACE")) {
return true
}

// Since Linux 4.10 (see bfedb589252c0) user namespaced containers cannot
// access /proc/$pid/exe of runc after it joins the namespace (until it
// does an exec), regardless of the capability set. This has been
// backported to other distribution kernels, but there's no way of checking
// this cheaply -- better to be safe than sorry here.
linux410 := kernelversion.KernelVersion{Kernel: 4, Major: 10}
if ok, err := kernelversion.GreaterEqualThan(linux410); ok && err == nil {
if c.Namespaces.Contains(configs.NEWUSER) {
return true
}
}

// Assume it's unsafe otherwise.
return false
}

func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
comm, err := newProcessComm()
if err != nil {
return nil, err
}

// Make sure we use a new safe copy of /proc/self/exe or the runc-dmz
// binary each time this is called, to make sure that if a container
// manages to overwrite the file it cannot affect other containers on the
// system. For runc, this code will only ever be called once, but
// libcontainer users might call this more than once.
p.closeClonedExes()
var (
exePath string
// only one of dmzExe or safeExe are used at a time
dmzExe, safeExe *os.File
)
if dmz.IsSelfExeCloned() {
// /proc/self/exe is already a cloned binary -- no need to do anything
logrus.Debug("skipping binary cloning -- /proc/self/exe is already cloned!")
// We don't need to use /proc/thread-self here because the exe mm of a
// thread-group is guaranteed to be the same for all threads by
// definition. This lets us avoid having to do runtime.LockOSThread.
exePath = "/proc/self/exe"
} else {
var err error
if isDmzBinarySafe(c.config) {
dmzExe, err = dmz.Binary(c.stateDir)
if err == nil {
// We can use our own executable without cloning if we are
// using runc-dmz. We don't need to use /proc/thread-self here
// because the exe mm of a thread-group is guaranteed to be the
// same for all threads by definition. This lets us avoid
// having to do runtime.LockOSThread.
exePath = "/proc/self/exe"
p.clonedExes = append(p.clonedExes, dmzExe)
logrus.Debug("runc-dmz: using runc-dmz") // used for tests
} else if errors.Is(err, dmz.ErrNoDmzBinary) {
logrus.Debug("runc-dmz binary not embedded in runc binary, falling back to /proc/self/exe clone")
} else if err != nil {
return nil, fmt.Errorf("failed to create runc-dmz binary clone: %w", err)
}
} else {
// If the configuration makes it unsafe to use runc-dmz, pretend we
// don't have it embedded so we do /proc/self/exe cloning.
logrus.Debug("container configuration unsafe for runc-dmz, falling back to /proc/self/exe clone")
err = dmz.ErrNoDmzBinary
}
if errors.Is(err, dmz.ErrNoDmzBinary) {
safeExe, err = dmz.CloneSelfExe(c.stateDir)
if err != nil {
return nil, fmt.Errorf("unable to create safe /proc/self/exe clone for runc init: %w", err)
}
exePath = "/proc/self/fd/" + strconv.Itoa(int(safeExe.Fd()))
p.clonedExes = append(p.clonedExes, safeExe)
logrus.Debug("runc-dmz: using /proc/self/exe clone") // used for tests
}
// Just to make sure we don't run without protection.
if dmzExe == nil && safeExe == nil {
// This should never happen.
return nil, fmt.Errorf("[internal error] attempted to spawn a container with no /proc/self/exe protection")
}
}

cmd := exec.Command(exePath, "init")
cmd := exec.Command("/proc/self/exe", "init")
cmd.Args[0] = os.Args[0]
cmd.Stdin = p.Stdin
cmd.Stdout = p.Stdout
Expand All @@ -580,12 +474,6 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
"_LIBCONTAINER_SYNCPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
)

if dmzExe != nil {
cmd.ExtraFiles = append(cmd.ExtraFiles, dmzExe)
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_DMZEXEFD="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1))
}

cmd.ExtraFiles = append(cmd.ExtraFiles, comm.logPipeChild)
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_LOGPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1))
Expand All @@ -600,18 +488,6 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
)
}

if safeExe != nil {
// Due to a Go stdlib bug, we need to add safeExe to the set of
// ExtraFiles otherwise it is possible for the stdlib to clobber the fd
// during forkAndExecInChild1 and replace it with some other file that
// might be malicious. This is less than ideal (because the descriptor
// will be non-O_CLOEXEC) however we have protections in "runc init" to
// stop us from leaking extra file descriptors.
//
// See <https://github.com/golang/go/issues/61751>.
cmd.ExtraFiles = append(cmd.ExtraFiles, safeExe)
}

// NOTE: when running a container with no PID namespace and the parent
// process spawning the container is PID1 the pdeathsig is being
// delivered to the container's init process by the kernel for some
Expand Down
18 changes: 14 additions & 4 deletions libcontainer/setns_init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import (
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"

"github.com/opencontainers-sec/go-containersec/execve"
"github.com/opencontainers-sec/go-containersec/path"
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/keys"
"github.com/opencontainers/runc/libcontainer/seccomp"
Expand Down Expand Up @@ -135,9 +137,17 @@ func (l *linuxSetnsInit) Init() error {
return &os.PathError{Op: "close log pipe", Path: "fd " + strconv.Itoa(l.logFd), Err: err}
}

if l.dmzExe != nil {
l.config.Args[0] = name
return system.Fexecve(l.dmzExe.Fd(), l.config.Args, os.Environ())
fd, cmd, args, env, err := execve.GetSecExecve(name, l.config.Args, os.Environ())
if err != nil {
return err
}
jail, err := path.IsPathInJail(cmd)
if err != nil {
return err
}
if !jail {
_ = unix.Close(fd)
return fmt.Errorf("can't find %s in the current file system", cmd)
}
return system.Exec(name, l.config.Args, os.Environ())
return system.Fexecve(uintptr(fd), args, env)
}
18 changes: 14 additions & 4 deletions libcontainer/standard_init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import (
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"

"github.com/opencontainers-sec/go-containersec/execve"
"github.com/opencontainers-sec/go-containersec/path"
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/keys"
Expand Down Expand Up @@ -278,9 +280,17 @@ func (l *linuxStandardInit) Init() error {
return err
}

if l.dmzExe != nil {
l.config.Args[0] = name
return system.Fexecve(l.dmzExe.Fd(), l.config.Args, os.Environ())
fd, cmd, args, env, err := execve.GetSecExecve(name, l.config.Args, os.Environ())
if err != nil {
return err
}
jail, err := path.IsPathInJail(cmd)
if err != nil {
return err
}
if !jail {
_ = unix.Close(fd)
return fmt.Errorf("can't find %s in the current file system", cmd)
}
return system.Exec(name, l.config.Args, os.Environ())
return system.Fexecve(uintptr(fd), args, env)
}
34 changes: 0 additions & 34 deletions tests/integration/run.bats
Original file line number Diff line number Diff line change
Expand Up @@ -127,40 +127,6 @@ function teardown() {
[ "${lines[0]}" = "410" ]
}

@test "runc run [runc-dmz]" {
runc --debug run test_hello
[ "$status" -eq 0 ]
[[ "$output" = *"Hello World"* ]]
# We use runc-dmz if we can.
[[ "$output" = *"runc-dmz: using runc-dmz"* ]]
}

@test "runc run [cap_sys_ptrace -> /proc/self/exe clone]" {
# Add CAP_SYS_PTRACE to the bounding set, the minimum needed to indicate a
# container process _could_ get CAP_SYS_PTRACE.
update_config '.process.capabilities.bounding += ["CAP_SYS_PTRACE"]'

runc --debug run test_hello
[ "$status" -eq 0 ]
[[ "$output" = *"Hello World"* ]]
if [ "$EUID" -ne 0 ] && is_kernel_gte 4.10; then
# For Linux 4.10 and later, rootless containers will use runc-dmz
# because they are running in a user namespace. See isDmzBinarySafe().
[[ "$output" = *"runc-dmz: using runc-dmz"* ]]
else
# If the container has CAP_SYS_PTRACE and is not rootless, we use
# /proc/self/exe cloning.
[[ "$output" = *"runc-dmz: using /proc/self/exe clone"* ]]
fi
}

@test "RUNC_DMZ=legacy runc run [/proc/self/exe clone]" {
RUNC_DMZ=legacy runc --debug run test_hello
[ "$status" -eq 0 ]
[[ "$output" = *"Hello World"* ]]
[[ "$output" = *"runc-dmz: using /proc/self/exe clone"* ]]
}

@test "runc run [joining existing container namespaces]" {
requires timens

Expand Down
Loading
Loading