From 429e06a518ae749ab6ba7671745ea835eb64cb9b Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Thu, 5 Sep 2024 05:05:24 +0900 Subject: [PATCH] libct: Signal: honor RootlessCgroups `signalAllProcesses()` depends on the cgroup and is expected to fail when runc is running in rootless without an access to the cgroup. When `RootlessCgroups` is set to `true`, runc just ignores the error from `signalAllProcesses` and may leak some processes running. (See the comments in PR 4395) In the future, runc should walk the process tree to avoid such a leak. Note that `RootlessCgroups` is a misnomer; it is set to `false` despite the name when cgroup v2 delegation is configured. This is expected to be renamed in a separate commit. Fix issue 4394 Signed-off-by: Akihiro Suda --- libcontainer/container_linux.go | 10 ++++++++++ libcontainer/init_linux.go | 2 ++ libcontainer/state_linux.go | 1 + tests/integration/kill.bats | 24 ++++++++++++++++++++++++ 4 files changed, 37 insertions(+) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 63db2199d54..6a91df01db0 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -388,11 +388,21 @@ func (c *Container) Signal(s os.Signal) error { // leftover processes. Handle this special case here. if s == unix.SIGKILL && !c.config.Namespaces.IsPrivate(configs.NEWPID) { if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); err != nil { + if c.config.RootlessCgroups { // may not have an access to cgroup + logrus.WithError(err).Warn("failed to kill all processes, possibly due to lack of cgroup (Hint: enable cgroup v2 delegation)") + // Some processes may leak when cgroup is not delegated + // https://github.com/opencontainers/runc/pull/4395#pullrequestreview-2291179652 + return c.signal(s) + } return fmt.Errorf("unable to kill all processes: %w", err) } return nil } + return c.signal(s) +} + +func (c *Container) signal(s os.Signal) error { // To avoid a PID reuse attack, don't kill non-running container. if !c.hasInit() { return ErrNotRunning diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index f3d0d5706f2..90f40cad257 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -695,6 +695,8 @@ func setupPersonality(config *configs.Config) error { // signalAllProcesses freezes then iterates over all the processes inside the // manager's cgroups sending the signal s to them. +// +// signalAllProcesses returns ErrNotRunning when the cgroup does not exist. func signalAllProcesses(m cgroups.Manager, s unix.Signal) error { if !m.Exists() { return ErrNotRunning diff --git a/libcontainer/state_linux.go b/libcontainer/state_linux.go index 0b1b8716a76..ad96f0801ea 100644 --- a/libcontainer/state_linux.go +++ b/libcontainer/state_linux.go @@ -44,6 +44,7 @@ func destroy(c *Container) error { // and destroy is supposed to remove all the container resources, we need // to kill those processes here. if !c.config.Namespaces.IsPrivate(configs.NEWPID) { + // Likely to fail when c.config.RootlessCgroups is true _ = signalAllProcesses(c.cgroupManager, unix.SIGKILL) } if err := c.cgroupManager.Destroy(); err != nil { diff --git a/tests/integration/kill.bats b/tests/integration/kill.bats index 4d3208be0bc..355407eccdb 100644 --- a/tests/integration/kill.bats +++ b/tests/integration/kill.bats @@ -133,3 +133,27 @@ test_host_pidns_kill() { test_host_pidns_kill unset KILL_INIT } + +# https://github.com/opencontainers/runc/issues/4394 (cgroup v1, rootless) +@test "kill KILL [shared pidns]" { + update_config '.process.args = ["sleep", "infinity"]' + + runc run -d --console-socket "$CONSOLE_SOCKET" target_ctr + [ "$status" -eq 0 ] + testcontainer target_ctr running + target_pid="$(__runc state target_ctr | jq .pid)" + update_config '.linux.namespaces |= map(if .type == "user" or .type == "pid" then (.path = "/proc/'"$target_pid"'/ns/" + .type) else . end) | del(.linux.uidMappings) | del(.linux.gidMappings)' + + runc run -d --console-socket "$CONSOLE_SOCKET" attached_ctr + [ "$status" -eq 0 ] + testcontainer attached_ctr running + + runc kill attached_ctr 9 + [ "$status" -eq 0 ] + + runc delete --force attached_ctr + [ "$status" -eq 0 ] + + runc delete --force target_ctr + [ "$status" -eq 0 ] +}