Skip to content

Commit

Permalink
Merge tag 'per-namespace-ipc-sysctls-for-v5.19' of git://git.kernel.o…
Browse files Browse the repository at this point in the history
…rg/pub/scm/linux/kernel/git/ebiederm/user-namespace

Pull ipc sysctl namespace updates from Eric Biederman:
 "This updates the ipc sysctls so that they are fundamentally per ipc
  namespace. Previously these sysctls depended upon a hack to simulate
  being per ipc namespace by looking up the ipc namespace in read or
  write. With this set of changes the ipc sysctls are registered per ipc
  namespace and open looks up the ipc namespace.

  Not only does this series of changes ensure the traditional binding at
  open time happens, but it sets a foundation for being able to relax
  the permission checks to allow a user namspace root to change the ipc
  sysctls for an ipc namespace that the user namespace root requires. To
  do this requires the ipc namespace to be known at open time"

* tag 'per-namespace-ipc-sysctls-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  ipc: Remove extra braces
  ipc: Check permissions for checkpoint_restart sysctls at open time
  ipc: Remove extra1 field abuse to pass ipc namespace
  ipc: Use the same namespace to modify and validate
  ipc: Store ipc sysctls in the ipc namespace
  ipc: Store mqueue sysctls in the ipc namespace
  • Loading branch information
torvalds committed Jun 3, 2022
2 parents 07953c5 + 38cd5b1 commit 1888e9b
Show file tree
Hide file tree
Showing 5 changed files with 238 additions and 145 deletions.
37 changes: 33 additions & 4 deletions include/linux/ipc_namespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <linux/ns_common.h>
#include <linux/refcount.h>
#include <linux/rhashtable-types.h>
#include <linux/sysctl.h>

struct user_namespace;

Expand Down Expand Up @@ -63,6 +64,12 @@ struct ipc_namespace {
unsigned int mq_msg_default;
unsigned int mq_msgsize_default;

struct ctl_table_set mq_set;
struct ctl_table_header *mq_sysctls;

struct ctl_table_set ipc_set;
struct ctl_table_header *ipc_sysctls;

/* user_ns which owns the ipc ns */
struct user_namespace *user_ns;
struct ucounts *ucounts;
Expand Down Expand Up @@ -169,15 +176,37 @@ static inline void put_ipc_ns(struct ipc_namespace *ns)

#ifdef CONFIG_POSIX_MQUEUE_SYSCTL

struct ctl_table_header;
extern struct ctl_table_header *mq_register_sysctl_table(void);
void retire_mq_sysctls(struct ipc_namespace *ns);
bool setup_mq_sysctls(struct ipc_namespace *ns);

#else /* CONFIG_POSIX_MQUEUE_SYSCTL */

static inline struct ctl_table_header *mq_register_sysctl_table(void)
static inline void retire_mq_sysctls(struct ipc_namespace *ns)
{
return NULL;
}

static inline bool setup_mq_sysctls(struct ipc_namespace *ns)
{
return true;
}

#endif /* CONFIG_POSIX_MQUEUE_SYSCTL */

#ifdef CONFIG_SYSVIPC_SYSCTL

bool setup_ipc_sysctls(struct ipc_namespace *ns);
void retire_ipc_sysctls(struct ipc_namespace *ns);

#else /* CONFIG_SYSVIPC_SYSCTL */

static inline void retire_ipc_sysctls(struct ipc_namespace *ns)
{
}

static inline bool setup_ipc_sysctls(struct ipc_namespace *ns)
{
return true;
}

#endif /* CONFIG_SYSVIPC_SYSCTL */
#endif
205 changes: 125 additions & 80 deletions ipc/ipc_sysctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,43 +13,17 @@
#include <linux/capability.h>
#include <linux/ipc_namespace.h>
#include <linux/msg.h>
#include <linux/slab.h>
#include "util.h"

static void *get_ipc(struct ctl_table *table)
{
char *which = table->data;
struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns;
return which;
}

static int proc_ipc_dointvec(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;

memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);

return proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
}

static int proc_ipc_dointvec_minmax(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;

memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);

return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
}

static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ipc_namespace *ns = current->nsproxy->ipc_ns;
int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos);
struct ipc_namespace *ns =
container_of(table->data, struct ipc_namespace, shm_rmid_forced);
int err;

err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);

if (err < 0)
return err;
Expand All @@ -58,17 +32,6 @@ static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write,
return err;
}

static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);

return proc_doulongvec_minmax(&ipc_table, write, buffer,
lenp, ppos);
}

static int proc_ipc_auto_msgmni(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
Expand All @@ -87,14 +50,15 @@ static int proc_ipc_auto_msgmni(struct ctl_table *table, int write,
static int proc_ipc_sem_dointvec(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ipc_namespace *ns =
container_of(table->data, struct ipc_namespace, sem_ctls);
int ret, semmni;
struct ipc_namespace *ns = current->nsproxy->ipc_ns;

semmni = ns->sem_ctls[3];
ret = proc_ipc_dointvec(table, write, buffer, lenp, ppos);
ret = proc_dointvec(table, write, buffer, lenp, ppos);

if (!ret)
ret = sem_check_semmni(current->nsproxy->ipc_ns);
ret = sem_check_semmni(ns);

/*
* Reset the semmni value if an error happens.
Expand All @@ -104,44 +68,31 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write,
return ret;
}

#ifdef CONFIG_CHECKPOINT_RESTORE
static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table,
int write, void *buffer, size_t *lenp, loff_t *ppos)
{
struct user_namespace *user_ns = current->nsproxy->ipc_ns->user_ns;

if (write && !checkpoint_restore_ns_capable(user_ns))
return -EPERM;

return proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos);
}
#endif

int ipc_mni = IPCMNI;
int ipc_mni_shift = IPCMNI_SHIFT;
int ipc_min_cycle = RADIX_TREE_MAP_SIZE;

static struct ctl_table ipc_kern_table[] = {
static struct ctl_table ipc_sysctls[] = {
{
.procname = "shmmax",
.data = &init_ipc_ns.shm_ctlmax,
.maxlen = sizeof(init_ipc_ns.shm_ctlmax),
.mode = 0644,
.proc_handler = proc_ipc_doulongvec_minmax,
.proc_handler = proc_doulongvec_minmax,
},
{
.procname = "shmall",
.data = &init_ipc_ns.shm_ctlall,
.maxlen = sizeof(init_ipc_ns.shm_ctlall),
.mode = 0644,
.proc_handler = proc_ipc_doulongvec_minmax,
.proc_handler = proc_doulongvec_minmax,
},
{
.procname = "shmmni",
.data = &init_ipc_ns.shm_ctlmni,
.maxlen = sizeof(init_ipc_ns.shm_ctlmni),
.mode = 0644,
.proc_handler = proc_ipc_dointvec_minmax,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = &ipc_mni,
},
Expand All @@ -159,7 +110,7 @@ static struct ctl_table ipc_kern_table[] = {
.data = &init_ipc_ns.msg_ctlmax,
.maxlen = sizeof(init_ipc_ns.msg_ctlmax),
.mode = 0644,
.proc_handler = proc_ipc_dointvec_minmax,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
},
Expand All @@ -168,7 +119,7 @@ static struct ctl_table ipc_kern_table[] = {
.data = &init_ipc_ns.msg_ctlmni,
.maxlen = sizeof(init_ipc_ns.msg_ctlmni),
.mode = 0644,
.proc_handler = proc_ipc_dointvec_minmax,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = &ipc_mni,
},
Expand All @@ -186,7 +137,7 @@ static struct ctl_table ipc_kern_table[] = {
.data = &init_ipc_ns.msg_ctlmnb,
.maxlen = sizeof(init_ipc_ns.msg_ctlmnb),
.mode = 0644,
.proc_handler = proc_ipc_dointvec_minmax,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
},
Expand All @@ -202,45 +153,139 @@ static struct ctl_table ipc_kern_table[] = {
.procname = "sem_next_id",
.data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id,
.maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id),
.mode = 0666,
.proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore,
.mode = 0444,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
},
{
.procname = "msg_next_id",
.data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id,
.maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id),
.mode = 0666,
.proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore,
.mode = 0444,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
},
{
.procname = "shm_next_id",
.data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id,
.maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id),
.mode = 0666,
.proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore,
.mode = 0444,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
},
#endif
{}
};

static struct ctl_table ipc_root_table[] = {
{
.procname = "kernel",
.mode = 0555,
.child = ipc_kern_table,
},
{}
static struct ctl_table_set *set_lookup(struct ctl_table_root *root)
{
return &current->nsproxy->ipc_ns->ipc_set;
}

static int set_is_seen(struct ctl_table_set *set)
{
return &current->nsproxy->ipc_ns->ipc_set == set;
}

static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table)
{
int mode = table->mode;

#ifdef CONFIG_CHECKPOINT_RESTORE
struct ipc_namespace *ns = current->nsproxy->ipc_ns;

if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) ||
(table->data == &ns->ids[IPC_MSG_IDS].next_id) ||
(table->data == &ns->ids[IPC_SHM_IDS].next_id)) &&
checkpoint_restore_ns_capable(ns->user_ns))
mode = 0666;
#endif
return mode;
}

static struct ctl_table_root set_root = {
.lookup = set_lookup,
.permissions = ipc_permissions,
};

bool setup_ipc_sysctls(struct ipc_namespace *ns)
{
struct ctl_table *tbl;

setup_sysctl_set(&ns->ipc_set, &set_root, set_is_seen);

tbl = kmemdup(ipc_sysctls, sizeof(ipc_sysctls), GFP_KERNEL);
if (tbl) {
int i;

for (i = 0; i < ARRAY_SIZE(ipc_sysctls); i++) {
if (tbl[i].data == &init_ipc_ns.shm_ctlmax)
tbl[i].data = &ns->shm_ctlmax;

else if (tbl[i].data == &init_ipc_ns.shm_ctlall)
tbl[i].data = &ns->shm_ctlall;

else if (tbl[i].data == &init_ipc_ns.shm_ctlmni)
tbl[i].data = &ns->shm_ctlmni;

else if (tbl[i].data == &init_ipc_ns.shm_rmid_forced)
tbl[i].data = &ns->shm_rmid_forced;

else if (tbl[i].data == &init_ipc_ns.msg_ctlmax)
tbl[i].data = &ns->msg_ctlmax;

else if (tbl[i].data == &init_ipc_ns.msg_ctlmni)
tbl[i].data = &ns->msg_ctlmni;

else if (tbl[i].data == &init_ipc_ns.msg_ctlmnb)
tbl[i].data = &ns->msg_ctlmnb;

else if (tbl[i].data == &init_ipc_ns.sem_ctls)
tbl[i].data = &ns->sem_ctls;
#ifdef CONFIG_CHECKPOINT_RESTORE
else if (tbl[i].data == &init_ipc_ns.ids[IPC_SEM_IDS].next_id)
tbl[i].data = &ns->ids[IPC_SEM_IDS].next_id;

else if (tbl[i].data == &init_ipc_ns.ids[IPC_MSG_IDS].next_id)
tbl[i].data = &ns->ids[IPC_MSG_IDS].next_id;

else if (tbl[i].data == &init_ipc_ns.ids[IPC_SHM_IDS].next_id)
tbl[i].data = &ns->ids[IPC_SHM_IDS].next_id;
#endif
else
tbl[i].data = NULL;
}

ns->ipc_sysctls = __register_sysctl_table(&ns->ipc_set, "kernel", tbl);
}
if (!ns->ipc_sysctls) {
kfree(tbl);
retire_sysctl_set(&ns->ipc_set);
return false;
}

return true;
}

void retire_ipc_sysctls(struct ipc_namespace *ns)
{
struct ctl_table *tbl;

tbl = ns->ipc_sysctls->ctl_table_arg;
unregister_sysctl_table(ns->ipc_sysctls);
retire_sysctl_set(&ns->ipc_set);
kfree(tbl);
}

static int __init ipc_sysctl_init(void)
{
register_sysctl_table(ipc_root_table);
if (!setup_ipc_sysctls(&init_ipc_ns)) {
pr_warn("ipc sysctl registration failed\n");
return -ENOMEM;
}
return 0;
}

Expand Down
Loading

0 comments on commit 1888e9b

Please sign in to comment.