From e7848482e227e8da4f958510734de370f6ee6479 Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Wed, 3 Jul 2024 17:25:10 +0900 Subject: [PATCH 1/2] Revert "libcontainer: seccomp: pass around *os.File for notifyfd" This reverts commit 20b95f23ca33cb1751ee0b5318e1d5f3676032c6. > Conflicts: > libcontainer/init_linux.go Signed-off-by: Akihiro Suda --- libcontainer/init_linux.go | 8 ++--- libcontainer/seccomp/patchbpf/enosys_linux.go | 13 +++---- libcontainer/seccomp/seccomp_linux.go | 35 ++++++++++--------- libcontainer/seccomp/seccomp_unsupported.go | 7 ++-- 4 files changed, 32 insertions(+), 31 deletions(-) diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index e0a65d5cc8f..f3d0d5706f2 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -451,11 +451,11 @@ func syncParentHooks(pipe *syncSocket) error { // syncParentSeccomp sends the fd associated with the seccomp file descriptor // to the parent, and wait for the parent to do pidfd_getfd() to grab a copy. -func syncParentSeccomp(pipe *syncSocket, seccompFd *os.File) error { - if seccompFd == nil { +func syncParentSeccomp(pipe *syncSocket, seccompFd int) error { + if seccompFd == -1 { return nil } - defer seccompFd.Close() + defer unix.Close(seccompFd) // Tell parent to grab our fd. // @@ -466,7 +466,7 @@ func syncParentSeccomp(pipe *syncSocket, seccompFd *os.File) error { // before the parent gets the file descriptor would deadlock "runc init" if // we allowed it for SCMP_ACT_NOTIFY). See seccomp.InitSeccomp() for more // details. - if err := writeSyncArg(pipe, procSeccomp, seccompFd.Fd()); err != nil { + if err := writeSyncArg(pipe, procSeccomp, seccompFd); err != nil { return err } // Wait for parent to tell us they've grabbed the seccompfd. diff --git a/libcontainer/seccomp/patchbpf/enosys_linux.go b/libcontainer/seccomp/patchbpf/enosys_linux.go index f0bf186ee0d..f829bf7ae2f 100644 --- a/libcontainer/seccomp/patchbpf/enosys_linux.go +++ b/libcontainer/seccomp/patchbpf/enosys_linux.go @@ -704,17 +704,17 @@ func sysSeccompSetFilter(flags uint, filter []unix.SockFilter) (fd int, err erro // patches said filter to handle -ENOSYS in a much nicer manner than the // default libseccomp default action behaviour, and loads the patched filter // into the kernel for the current process. -func PatchAndLoad(config *configs.Seccomp, filter *libseccomp.ScmpFilter) (*os.File, error) { +func PatchAndLoad(config *configs.Seccomp, filter *libseccomp.ScmpFilter) (int, error) { // Generate a patched filter. fprog, err := enosysPatchFilter(config, filter) if err != nil { - return nil, fmt.Errorf("error patching filter: %w", err) + return -1, fmt.Errorf("error patching filter: %w", err) } // Get the set of libseccomp flags set. seccompFlags, noNewPrivs, err := filterFlags(config, filter) if err != nil { - return nil, fmt.Errorf("unable to fetch seccomp filter flags: %w", err) + return -1, fmt.Errorf("unable to fetch seccomp filter flags: %w", err) } // Set no_new_privs if it was requested, though in runc we handle @@ -722,14 +722,15 @@ func PatchAndLoad(config *configs.Seccomp, filter *libseccomp.ScmpFilter) (*os.F if noNewPrivs { logrus.Warnf("potentially misconfigured filter -- setting no_new_privs in seccomp path") if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { - return nil, fmt.Errorf("error enabling no_new_privs bit: %w", err) + return -1, fmt.Errorf("error enabling no_new_privs bit: %w", err) } } // Finally, load the filter. fd, err := sysSeccompSetFilter(seccompFlags, fprog) if err != nil { - return nil, fmt.Errorf("error loading seccomp filter: %w", err) + return -1, fmt.Errorf("error loading seccomp filter: %w", err) } - return os.NewFile(uintptr(fd), "[seccomp filter]"), nil + + return fd, nil } diff --git a/libcontainer/seccomp/seccomp_linux.go b/libcontainer/seccomp/seccomp_linux.go index 2f646b6d822..e399972aa53 100644 --- a/libcontainer/seccomp/seccomp_linux.go +++ b/libcontainer/seccomp/seccomp_linux.go @@ -5,7 +5,6 @@ package seccomp import ( "errors" "fmt" - "os" libseccomp "github.com/seccomp/libseccomp-golang" "github.com/sirupsen/logrus" @@ -27,16 +26,17 @@ const ( ) // InitSeccomp installs the seccomp filters to be used in the container as -// specified in config. Returns the seccomp file descriptor if any of the -// filters include a SCMP_ACT_NOTIFY action. -func InitSeccomp(config *configs.Seccomp) (*os.File, error) { +// specified in config. +// Returns the seccomp file descriptor if any of the filters include a +// SCMP_ACT_NOTIFY action, otherwise returns -1. +func InitSeccomp(config *configs.Seccomp) (int, error) { if config == nil { - return nil, errors.New("cannot initialize Seccomp - nil config passed") + return -1, errors.New("cannot initialize Seccomp - nil config passed") } defaultAction, err := getAction(config.DefaultAction, config.DefaultErrnoRet) if err != nil { - return nil, errors.New("error initializing seccomp - invalid default action") + return -1, errors.New("error initializing seccomp - invalid default action") } // Ignore the error since pre-2.4 libseccomp is treated as API level 0. @@ -44,7 +44,7 @@ func InitSeccomp(config *configs.Seccomp) (*os.File, error) { for _, call := range config.Syscalls { if call.Action == configs.Notify { if apiLevel < 6 { - return nil, fmt.Errorf("seccomp notify unsupported: API level: got %d, want at least 6. Please try with libseccomp >= 2.5.0 and Linux >= 5.7", apiLevel) + return -1, fmt.Errorf("seccomp notify unsupported: API level: got %d, want at least 6. Please try with libseccomp >= 2.5.0 and Linux >= 5.7", apiLevel) } // We can't allow the write syscall to notify to the seccomp agent. @@ -60,36 +60,36 @@ func InitSeccomp(config *configs.Seccomp) (*os.File, error) { // agent allows those syscalls to proceed, initialization works just fine and the agent can // handle future read()/close() syscalls as it wanted. if call.Name == "write" { - return nil, errors.New("SCMP_ACT_NOTIFY cannot be used for the write syscall") + return -1, errors.New("SCMP_ACT_NOTIFY cannot be used for the write syscall") } } } // See comment on why write is not allowed. The same reason applies, as this can mean handling write too. if defaultAction == libseccomp.ActNotify { - return nil, errors.New("SCMP_ACT_NOTIFY cannot be used as default action") + return -1, errors.New("SCMP_ACT_NOTIFY cannot be used as default action") } filter, err := libseccomp.NewFilter(defaultAction) if err != nil { - return nil, fmt.Errorf("error creating filter: %w", err) + return -1, fmt.Errorf("error creating filter: %w", err) } // Add extra architectures for _, arch := range config.Architectures { scmpArch, err := libseccomp.GetArchFromString(arch) if err != nil { - return nil, fmt.Errorf("error validating Seccomp architecture: %w", err) + return -1, fmt.Errorf("error validating Seccomp architecture: %w", err) } if err := filter.AddArch(scmpArch); err != nil { - return nil, fmt.Errorf("error adding architecture to seccomp filter: %w", err) + return -1, fmt.Errorf("error adding architecture to seccomp filter: %w", err) } } // Add extra flags. for _, flag := range config.Flags { if err := setFlag(filter, flag); err != nil { - return nil, err + return -1, err } } @@ -109,24 +109,25 @@ func InitSeccomp(config *configs.Seccomp) (*os.File, error) { // Unset no new privs bit if err := filter.SetNoNewPrivsBit(false); err != nil { - return nil, fmt.Errorf("error setting no new privileges: %w", err) + return -1, fmt.Errorf("error setting no new privileges: %w", err) } // Add a rule for each syscall for _, call := range config.Syscalls { if call == nil { - return nil, errors.New("encountered nil syscall while initializing Seccomp") + return -1, errors.New("encountered nil syscall while initializing Seccomp") } if err := matchCall(filter, call, defaultAction); err != nil { - return nil, err + return -1, err } } seccompFd, err := patchbpf.PatchAndLoad(config, filter) if err != nil { - return nil, fmt.Errorf("error loading seccomp filter into kernel: %w", err) + return -1, fmt.Errorf("error loading seccomp filter into kernel: %w", err) } + return seccompFd, nil } diff --git a/libcontainer/seccomp/seccomp_unsupported.go b/libcontainer/seccomp/seccomp_unsupported.go index d2222ce71b7..25713f23271 100644 --- a/libcontainer/seccomp/seccomp_unsupported.go +++ b/libcontainer/seccomp/seccomp_unsupported.go @@ -4,7 +4,6 @@ package seccomp import ( "errors" - "os" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runtime-spec/specs-go" @@ -13,11 +12,11 @@ import ( var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported") // InitSeccomp does nothing because seccomp is not supported. -func InitSeccomp(config *configs.Seccomp) (*os.File, error) { +func InitSeccomp(config *configs.Seccomp) (int, error) { if config != nil { - return nil, ErrSeccompNotEnabled + return -1, ErrSeccompNotEnabled } - return nil, nil + return -1, nil } // FlagSupported tells if a provided seccomp flag is supported. From a5e660ca5b3045545c2a442f0cae2b3f1862958e Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Wed, 3 Jul 2024 17:25:51 +0900 Subject: [PATCH 2/2] seccomp-notify.bats: add fcntl to the important syscall list For issue 4328 Signed-off-by: Akihiro Suda --- tests/integration/seccomp-notify.bats | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/integration/seccomp-notify.bats b/tests/integration/seccomp-notify.bats index d979cf09ce8..58492090236 100644 --- a/tests/integration/seccomp-notify.bats +++ b/tests/integration/seccomp-notify.bats @@ -83,8 +83,9 @@ function scmp_act_notify_template() { } # Test important syscalls (some might be executed by runc) work fine when handled by the agent. noNewPrivileges FALSE. +# fcntl: https://github.com/opencontainers/runc/issues/4328 @test "runc run [seccomp] (SCMP_ACT_NOTIFY important syscalls noNewPrivileges false)" { - scmp_act_notify_template "/bin/true" false '"execve","openat","open","read","close"' + scmp_act_notify_template "/bin/true" false '"execve","openat","open","read","close","fcntl"' runc run test_busybox [ "$status" -eq 0 ] @@ -92,7 +93,7 @@ function scmp_act_notify_template() { # Test important syscalls (some might be executed by runc) work fine when handled by the agent. noNewPrivileges TRUE. @test "runc run [seccomp] (SCMP_ACT_NOTIFY important syscalls noNewPrivileges true)" { - scmp_act_notify_template "/bin/true" true '"execve","openat","open","read","close"' + scmp_act_notify_template "/bin/true" true '"execve","openat","open","read","close","fcntl"' runc run test_busybox [ "$status" -eq 0 ]