Skip to content

Commit

Permalink
Merge tag 'threads-v5.8' of git://git.kernel.org/pub/scm/linux/kernel…
Browse files Browse the repository at this point in the history
…/git/brauner/linux

Pull thread updates from Christian Brauner:
 "We have been discussing using pidfds to attach to namespaces for quite
  a while and the patches have in one form or another already existed
  for about a year. But I wanted to wait to see how the general api
  would be received and adopted.

  This contains the changes to make it possible to use pidfds to attach
  to the namespaces of a process, i.e. they can be passed as the first
  argument to the setns() syscall.

  When only a single namespace type is specified the semantics are
  equivalent to passing an nsfd. That means setns(nsfd, CLONE_NEWNET)
  equals setns(pidfd, CLONE_NEWNET).

  However, when a pidfd is passed, multiple namespace flags can be
  specified in the second setns() argument and setns() will attach the
  caller to all the specified namespaces all at once or to none of them.

  Specifying 0 is not valid together with a pidfd. Here are just two
  obvious examples:

    setns(pidfd, CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET);
    setns(pidfd, CLONE_NEWUSER);

  Allowing to also attach subsets of namespaces supports various
  use-cases where callers setns to a subset of namespaces to retain
  privilege, perform an action and then re-attach another subset of
  namespaces.

  Apart from significantly reducing the number of syscalls needed to
  attach to all currently supported namespaces (eight "open+setns"
  sequences vs just a single "setns()"), this also allows atomic setns
  to a set of namespaces, i.e. either attaching to all namespaces
  succeeds or we fail without having changed anything.

  This is centered around a new internal struct nsset which holds all
  information necessary for a task to switch to a new set of namespaces
  atomically. Fwiw, with this change a pidfd becomes the only token
  needed to interact with a container. I'm expecting this to be
  picked-up by util-linux for nsenter rather soon.

  Associated with this change is a shiny new test-suite dedicated to
  setns() (for pidfds and nsfds alike)"

* tag 'threads-v5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux:
  selftests/pidfd: add pidfd setns tests
  nsproxy: attach to namespaces via pidfds
  nsproxy: add struct nsset
  • Loading branch information
torvalds committed Jun 3, 2020
2 parents d479c5a + 2b40c5d commit e7c93cb
Show file tree
Hide file tree
Showing 18 changed files with 833 additions and 47 deletions.
15 changes: 11 additions & 4 deletions fs/namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -1786,6 +1786,11 @@ static struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
return container_of(ns, struct mnt_namespace, ns);
}

struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
{
return &mnt->ns;
}

static bool mnt_ns_loop(struct dentry *dentry)
{
/* Could bind mounting the mount namespace inode cause a
Expand Down Expand Up @@ -4013,16 +4018,18 @@ static void mntns_put(struct ns_common *ns)
put_mnt_ns(to_mnt_ns(ns));
}

static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
static int mntns_install(struct nsset *nsset, struct ns_common *ns)
{
struct fs_struct *fs = current->fs;
struct nsproxy *nsproxy = nsset->nsproxy;
struct fs_struct *fs = nsset->fs;
struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
struct user_namespace *user_ns = nsset->cred->user_ns;
struct path root;
int err;

if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
!ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
!ns_capable(user_ns, CAP_SYS_CHROOT) ||
!ns_capable(user_ns, CAP_SYS_ADMIN))
return -EPERM;

if (is_anon_ns(mnt_ns))
Expand Down
5 changes: 5 additions & 0 deletions fs/nsfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,11 @@ int ns_get_name(char *buf, size_t size, struct task_struct *task,
return res;
}

bool proc_ns_file(const struct file *file)
{
return file->f_op == &ns_file_operations;
}

struct file *proc_ns_fget(int fd)
{
struct file *file;
Expand Down
2 changes: 2 additions & 0 deletions include/linux/mnt_namespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
struct mnt_namespace;
struct fs_struct;
struct user_namespace;
struct ns_common;

extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
struct user_namespace *, struct fs_struct *);
extern void put_mnt_ns(struct mnt_namespace *ns);
extern struct ns_common *from_mnt_ns(struct mnt_namespace *);

extern const struct file_operations proc_mounts_operations;
extern const struct file_operations proc_mountinfo_operations;
Expand Down
24 changes: 24 additions & 0 deletions include/linux/nsproxy.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,30 @@ struct nsproxy {
};
extern struct nsproxy init_nsproxy;

/*
* A structure to encompass all bits needed to install
* a partial or complete new set of namespaces.
*
* If a new user namespace is requested cred will
* point to a modifiable set of credentials. If a pointer
* to a modifiable set is needed nsset_cred() must be
* used and tested.
*/
struct nsset {
unsigned flags;
struct nsproxy *nsproxy;
struct fs_struct *fs;
const struct cred *cred;
};

static inline struct cred *nsset_cred(struct nsset *set)
{
if (set->flags & CLONE_NEWUSER)
return (struct cred *)set->cred;

return NULL;
}

/*
* the namespaces access rules are:
*
Expand Down
2 changes: 2 additions & 0 deletions include/linux/proc_fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,4 +179,6 @@ static inline struct pid_namespace *proc_pid_ns(const struct inode *inode)
return inode->i_sb->s_fs_info;
}

bool proc_ns_file(const struct file *file);

#endif /* _LINUX_PROC_FS_H */
4 changes: 2 additions & 2 deletions include/linux/proc_ns.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include <linux/ns_common.h>

struct pid_namespace;
struct nsproxy;
struct nsset;
struct path;
struct task_struct;
struct inode;
Expand All @@ -19,7 +19,7 @@ struct proc_ns_operations {
int type;
struct ns_common *(*get)(struct task_struct *task);
void (*put)(struct ns_common *ns);
int (*install)(struct nsproxy *nsproxy, struct ns_common *ns);
int (*install)(struct nsset *nsset, struct ns_common *ns);
struct user_namespace *(*owner)(struct ns_common *ns);
struct ns_common *(*get_parent)(struct ns_common *ns);
} __randomize_layout;
Expand Down
7 changes: 3 additions & 4 deletions ipc/namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,15 +177,14 @@ static void ipcns_put(struct ns_common *ns)
return put_ipc_ns(to_ipc_ns(ns));
}

static int ipcns_install(struct nsproxy *nsproxy, struct ns_common *new)
static int ipcns_install(struct nsset *nsset, struct ns_common *new)
{
struct nsproxy *nsproxy = nsset->nsproxy;
struct ipc_namespace *ns = to_ipc_ns(new);
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
!ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
return -EPERM;

/* Ditch state from the old ipc namespace */
exit_sem(current);
put_ipc_ns(nsproxy->ipc_ns);
nsproxy->ipc_ns = get_ipc_ns(ns);
return 0;
Expand Down
5 changes: 3 additions & 2 deletions kernel/cgroup/namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,12 @@ static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns)
return container_of(ns, struct cgroup_namespace, ns);
}

static int cgroupns_install(struct nsproxy *nsproxy, struct ns_common *ns)
static int cgroupns_install(struct nsset *nsset, struct ns_common *ns)
{
struct nsproxy *nsproxy = nsset->nsproxy;
struct cgroup_namespace *cgroup_ns = to_cg_ns(ns);

if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN) ||
if (!ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN) ||
!ns_capable(cgroup_ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;

Expand Down
Loading

0 comments on commit e7c93cb

Please sign in to comment.