Skip to content

Commit

Permalink
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel…
Browse files Browse the repository at this point in the history
…/git/ebiederm/user-namespace

Pull user namespace rlimit handling update from Eric Biederman:
 "This is the work mainly by Alexey Gladkov to limit rlimits to the
  rlimits of the user that created a user namespace, and to allow users
  to have stricter limits on the resources created within a user
  namespace."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  cred: add missing return error code when set_cred_ucounts() failed
  ucounts: Silence warning in dec_rlimit_ucounts
  ucounts: Set ucount_max to the largest positive value the type can hold
  kselftests: Add test to check for rlimit changes in different user namespaces
  Reimplement RLIMIT_MEMLOCK on top of ucounts
  Reimplement RLIMIT_SIGPENDING on top of ucounts
  Reimplement RLIMIT_MSGQUEUE on top of ucounts
  Reimplement RLIMIT_NPROC on top of ucounts
  Use atomic_t for ucounts reference counting
  Add a reference to ucounts for each cred
  Increase size of ucounts to atomic_long_t
  • Loading branch information
torvalds committed Jun 29, 2021
2 parents e17c120 + 5e6b8a5 commit c54b245
Show file tree
Hide file tree
Showing 29 changed files with 468 additions and 127 deletions.
6 changes: 5 additions & 1 deletion fs/exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -1360,6 +1360,10 @@ int begin_new_exec(struct linux_binprm * bprm)
WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1);
flush_signal_handlers(me, 0);

retval = set_cred_ucounts(bprm->cred);
if (retval < 0)
goto out_unlock;

/*
* install the new credentials for this executable
*/
Expand Down Expand Up @@ -1874,7 +1878,7 @@ static int do_execveat_common(int fd, struct filename *filename,
* whether NPROC limit is still exceeded.
*/
if ((current->flags & PF_NPROC_EXCEEDED) &&
atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
is_ucounts_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
retval = -EAGAIN;
goto out_ret;
}
Expand Down
16 changes: 8 additions & 8 deletions fs/hugetlbfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1446,7 +1446,7 @@ static int get_hstate_idx(int page_size_log)
* otherwise hugetlb_reserve_pages reserves one less hugepages than intended.
*/
struct file *hugetlb_file_setup(const char *name, size_t size,
vm_flags_t acctflag, struct user_struct **user,
vm_flags_t acctflag, struct ucounts **ucounts,
int creat_flags, int page_size_log)
{
struct inode *inode;
Expand All @@ -1458,20 +1458,20 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
if (hstate_idx < 0)
return ERR_PTR(-ENODEV);

*user = NULL;
*ucounts = NULL;
mnt = hugetlbfs_vfsmount[hstate_idx];
if (!mnt)
return ERR_PTR(-ENOENT);

if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
*user = current_user();
if (user_shm_lock(size, *user)) {
*ucounts = current_ucounts();
if (user_shm_lock(size, *ucounts)) {
task_lock(current);
pr_warn_once("%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n",
current->comm, current->pid);
task_unlock(current);
} else {
*user = NULL;
*ucounts = NULL;
return ERR_PTR(-EPERM);
}
}
Expand All @@ -1498,9 +1498,9 @@ struct file *hugetlb_file_setup(const char *name, size_t size,

iput(inode);
out:
if (*user) {
user_shm_unlock(size, *user);
*user = NULL;
if (*ucounts) {
user_shm_unlock(size, *ucounts);
*ucounts = NULL;
}
return file;
}
Expand Down
2 changes: 1 addition & 1 deletion fs/proc/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
collect_sigign_sigcatch(p, &ignored, &caught);
num_threads = get_nr_threads(p);
rcu_read_lock(); /* FIXME: is this correct? */
qsize = atomic_read(&__task_cred(p)->user->sigpending);
qsize = get_ucounts_value(task_ucounts(p), UCOUNT_RLIMIT_SIGPENDING);
rcu_read_unlock();
qlim = task_rlimit(p, RLIMIT_SIGPENDING);
unlock_task_sighand(p, &flags);
Expand Down
4 changes: 4 additions & 0 deletions include/linux/cred.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ struct cred {
#endif
struct user_struct *user; /* real user ID subscription */
struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
struct ucounts *ucounts;
struct group_info *group_info; /* supplementary groups for euid/fsgid */
/* RCU deletion */
union {
Expand All @@ -169,6 +170,7 @@ extern int set_security_override_from_ctx(struct cred *, const char *);
extern int set_create_files_as(struct cred *, struct inode *);
extern int cred_fscmp(const struct cred *, const struct cred *);
extern void __init cred_init(void);
extern int set_cred_ucounts(struct cred *);

/*
* check for validity of credentials
Expand Down Expand Up @@ -369,6 +371,7 @@ static inline void put_cred(const struct cred *_cred)

#define task_uid(task) (task_cred_xxx((task), uid))
#define task_euid(task) (task_cred_xxx((task), euid))
#define task_ucounts(task) (task_cred_xxx((task), ucounts))

#define current_cred_xxx(xxx) \
({ \
Expand All @@ -385,6 +388,7 @@ static inline void put_cred(const struct cred *_cred)
#define current_fsgid() (current_cred_xxx(fsgid))
#define current_cap() (current_cred_xxx(cap_effective))
#define current_user() (current_cred_xxx(user))
#define current_ucounts() (current_cred_xxx(ucounts))

extern struct user_namespace init_user_ns;
#ifdef CONFIG_USER_NS
Expand Down
4 changes: 2 additions & 2 deletions include/linux/hugetlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
extern const struct file_operations hugetlbfs_file_operations;
extern const struct vm_operations_struct hugetlb_vm_ops;
struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
struct user_struct **user, int creat_flags,
struct ucounts **ucounts, int creat_flags,
int page_size_log);

static inline bool is_file_hugepages(struct file *file)
Expand All @@ -471,7 +471,7 @@ static inline struct hstate *hstate_inode(struct inode *i)
#define is_file_hugepages(file) false
static inline struct file *
hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag,
struct user_struct **user, int creat_flags,
struct ucounts **ucounts, int creat_flags,
int page_size_log)
{
return ERR_PTR(-ENOSYS);
Expand Down
4 changes: 2 additions & 2 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -1709,8 +1709,8 @@ extern bool can_do_mlock(void);
#else
static inline bool can_do_mlock(void) { return false; }
#endif
extern int user_shm_lock(size_t, struct user_struct *);
extern void user_shm_unlock(size_t, struct user_struct *);
extern int user_shm_lock(size_t, struct ucounts *);
extern void user_shm_unlock(size_t, struct ucounts *);

/*
* Parameter block passed down to zap_pte_range in exceptional cases.
Expand Down
7 changes: 0 additions & 7 deletions include/linux/sched/user.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,9 @@
*/
struct user_struct {
refcount_t __count; /* reference count */
atomic_t processes; /* How many processes does this user have? */
atomic_t sigpending; /* How many pending signals does this user have? */
#ifdef CONFIG_EPOLL
atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
#endif
#ifdef CONFIG_POSIX_MQUEUE
/* protected by mq_lock */
unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */
#endif
unsigned long locked_shm; /* How many pages of mlocked shm ? */
unsigned long unix_inflight; /* How many files in flight in unix sockets */
atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */

Expand Down
2 changes: 1 addition & 1 deletion include/linux/shmem_fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt,
extern int shmem_zero_setup(struct vm_area_struct *);
extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags);
extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
extern int shmem_lock(struct file *file, int lock, struct ucounts *ucounts);
#ifdef CONFIG_SHMEM
extern const struct address_space_operations shmem_aops;
static inline bool shmem_mapping(struct address_space *mapping)
Expand Down
4 changes: 3 additions & 1 deletion include/linux/signal_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ typedef struct kernel_siginfo {
__SIGINFO;
} kernel_siginfo_t;

struct ucounts;

/*
* Real Time signals may be queued.
*/
Expand All @@ -21,7 +23,7 @@ struct sigqueue {
struct list_head list;
int flags;
kernel_siginfo_t info;
struct user_struct *user;
struct ucounts *ucounts;
};

/* flags values. */
Expand Down
31 changes: 28 additions & 3 deletions include/linux/user_namespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,15 @@ enum ucount_type {
UCOUNT_FANOTIFY_GROUPS,
UCOUNT_FANOTIFY_MARKS,
#endif
UCOUNT_RLIMIT_NPROC,
UCOUNT_RLIMIT_MSGQUEUE,
UCOUNT_RLIMIT_SIGPENDING,
UCOUNT_RLIMIT_MEMLOCK,
UCOUNT_COUNTS,
};

#define MAX_PER_NAMESPACE_UCOUNTS UCOUNT_RLIMIT_NPROC

struct user_namespace {
struct uid_gid_map uid_map;
struct uid_gid_map gid_map;
Expand Down Expand Up @@ -92,23 +98,42 @@ struct user_namespace {
struct ctl_table_header *sysctls;
#endif
struct ucounts *ucounts;
int ucount_max[UCOUNT_COUNTS];
long ucount_max[UCOUNT_COUNTS];
} __randomize_layout;

struct ucounts {
struct hlist_node node;
struct user_namespace *ns;
kuid_t uid;
int count;
atomic_t ucount[UCOUNT_COUNTS];
atomic_t count;
atomic_long_t ucount[UCOUNT_COUNTS];
};

extern struct user_namespace init_user_ns;
extern struct ucounts init_ucounts;

bool setup_userns_sysctls(struct user_namespace *ns);
void retire_userns_sysctls(struct user_namespace *ns);
struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
void dec_ucount(struct ucounts *ucounts, enum ucount_type type);
struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid);
struct ucounts * __must_check get_ucounts(struct ucounts *ucounts);
void put_ucounts(struct ucounts *ucounts);

static inline long get_ucounts_value(struct ucounts *ucounts, enum ucount_type type)
{
return atomic_long_read(&ucounts->ucount[type]);
}

long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max);

static inline void set_rlimit_ucount_max(struct user_namespace *ns,
enum ucount_type type, unsigned long max)
{
ns->ucount_max[type] = max <= LONG_MAX ? max : LONG_MAX;
}

#ifdef CONFIG_USER_NS

Expand Down
40 changes: 21 additions & 19 deletions ipc/mqueue.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ struct mqueue_inode_info {
struct pid *notify_owner;
u32 notify_self_exec_id;
struct user_namespace *notify_user_ns;
struct user_struct *user; /* user who created, for accounting */
struct ucounts *ucounts; /* user who created, for accounting */
struct sock *notify_sock;
struct sk_buff *notify_cookie;

Expand Down Expand Up @@ -292,7 +292,6 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
struct ipc_namespace *ipc_ns, umode_t mode,
struct mq_attr *attr)
{
struct user_struct *u = current_user();
struct inode *inode;
int ret = -ENOMEM;

Expand Down Expand Up @@ -321,7 +320,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
info->notify_owner = NULL;
info->notify_user_ns = NULL;
info->qsize = 0;
info->user = NULL; /* set when all is ok */
info->ucounts = NULL; /* set when all is ok */
info->msg_tree = RB_ROOT;
info->msg_tree_rightmost = NULL;
info->node_cache = NULL;
Expand Down Expand Up @@ -371,19 +370,23 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
if (mq_bytes + mq_treesize < mq_bytes)
goto out_inode;
mq_bytes += mq_treesize;
spin_lock(&mq_lock);
if (u->mq_bytes + mq_bytes < u->mq_bytes ||
u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) {
info->ucounts = get_ucounts(current_ucounts());
if (info->ucounts) {
long msgqueue;

spin_lock(&mq_lock);
msgqueue = inc_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
if (msgqueue == LONG_MAX || msgqueue > rlimit(RLIMIT_MSGQUEUE)) {
dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
spin_unlock(&mq_lock);
put_ucounts(info->ucounts);
info->ucounts = NULL;
/* mqueue_evict_inode() releases info->messages */
ret = -EMFILE;
goto out_inode;
}
spin_unlock(&mq_lock);
/* mqueue_evict_inode() releases info->messages */
ret = -EMFILE;
goto out_inode;
}
u->mq_bytes += mq_bytes;
spin_unlock(&mq_lock);

/* all is ok */
info->user = get_uid(u);
} else if (S_ISDIR(mode)) {
inc_nlink(inode);
/* Some things misbehave if size == 0 on a directory */
Expand Down Expand Up @@ -497,7 +500,6 @@ static void mqueue_free_inode(struct inode *inode)
static void mqueue_evict_inode(struct inode *inode)
{
struct mqueue_inode_info *info;
struct user_struct *user;
struct ipc_namespace *ipc_ns;
struct msg_msg *msg, *nmsg;
LIST_HEAD(tmp_msg);
Expand All @@ -520,8 +522,7 @@ static void mqueue_evict_inode(struct inode *inode)
free_msg(msg);
}

user = info->user;
if (user) {
if (info->ucounts) {
unsigned long mq_bytes, mq_treesize;

/* Total amount of bytes accounted for the mqueue */
Expand All @@ -533,7 +534,7 @@ static void mqueue_evict_inode(struct inode *inode)
info->attr.mq_msgsize);

spin_lock(&mq_lock);
user->mq_bytes -= mq_bytes;
dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
/*
* get_ns_from_inode() ensures that the
* (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
Expand All @@ -543,7 +544,8 @@ static void mqueue_evict_inode(struct inode *inode)
if (ipc_ns)
ipc_ns->mq_queues_count--;
spin_unlock(&mq_lock);
free_uid(user);
put_ucounts(info->ucounts);
info->ucounts = NULL;
}
if (ipc_ns)
put_ipc_ns(ipc_ns);
Expand Down
Loading

0 comments on commit c54b245

Please sign in to comment.