To count the number of user processes use the counter bound to the user in the user namespace. Signed-off-by: Alexey Gladkov <gladkov.alexey@xxxxxxxxx> --- fs/exec.c | 7 ++++--- fs/io-wq.c | 14 +++++++++----- include/linux/sched/user.h | 1 - include/linux/user_namespace.h | 8 ++++++++ kernel/cred.c | 8 ++++---- kernel/exit.c | 2 +- kernel/fork.c | 9 +++++---- kernel/sys.c | 6 ++++-- kernel/ucount.c | 34 ++++++++++++++++++++++++++++++++++ kernel/user.c | 3 ++- 10 files changed, 71 insertions(+), 21 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index e6e8a9a70327..3f2071f7b9c7 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1826,19 +1826,20 @@ static int __do_execve_file(int fd, struct filename *filename, char *pathbuf = NULL; struct linux_binprm *bprm; struct files_struct *displaced; - int retval; + int retval, processes; if (IS_ERR(filename)) return PTR_ERR(filename); + processes = get_rlimit_counter(&init_user_ns, current_euid(), UCOUNT_RLIMIT_NPROC); + /* * We move the actual failure in case of RLIMIT_NPROC excess from * set*uid() to execve() because too many poorly written programs * don't check setuid() return code. Here we additionally recheck * whether NPROC limit is still exceeded. */ - if ((current->flags & PF_NPROC_EXCEEDED) && - atomic_read(¤t_user()->processes) > rlimit(RLIMIT_NPROC)) { + if ((current->flags & PF_NPROC_EXCEEDED) && processes > rlimit(RLIMIT_NPROC)) { retval = -EAGAIN; goto out_ret; } diff --git a/fs/io-wq.c b/fs/io-wq.c index 47c5f3aeb460..6170aee986db 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -17,6 +17,7 @@ #include <linux/rculist_nulls.h> #include <linux/fs_struct.h> #include <linux/task_work.h> +#include <linux/user_namespace.h> #include "io-wq.h" @@ -216,7 +217,7 @@ static void io_worker_exit(struct io_worker *worker) if (worker->flags & IO_WORKER_F_RUNNING) atomic_dec(&acct->nr_running); if (!(worker->flags & IO_WORKER_F_BOUND)) - atomic_dec(&wqe->wq->user->processes); + dec_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC); worker->flags = 0; preempt_enable(); @@ -349,12 +350,12 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker, worker->flags |= IO_WORKER_F_BOUND; wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers--; wqe->acct[IO_WQ_ACCT_BOUND].nr_workers++; - atomic_dec(&wqe->wq->user->processes); + dec_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC); } else { worker->flags &= ~IO_WORKER_F_BOUND; wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers++; wqe->acct[IO_WQ_ACCT_BOUND].nr_workers--; - atomic_inc(&wqe->wq->user->processes); + inc_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC); } io_wqe_inc_running(wqe, worker); } @@ -671,7 +672,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) spin_unlock_irq(&wqe->lock); if (index == IO_WQ_ACCT_UNBOUND) - atomic_inc(&wq->user->processes); + inc_rlimit_counter(&init_user_ns, wq->user->uid, UCOUNT_RLIMIT_NPROC); wake_up_process(worker->task); return true; @@ -754,6 +755,7 @@ static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct, struct io_wq_work *work) { bool free_worker; + int processes; if (!(work->flags & IO_WQ_WORK_UNBOUND)) return true; @@ -766,7 +768,9 @@ static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct, if (free_worker) return true; - if (atomic_read(&wqe->wq->user->processes) >= acct->max_workers && + processes = get_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC); + + if (processes >= acct->max_workers && !(capable(CAP_SYS_RESOURCE) || capable(CAP_SYS_ADMIN))) return false; diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index 917d88edb7b9..38e122bc3d07 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -12,7 +12,6 @@ */ struct user_struct { refcount_t __count; /* reference count */ - atomic_t processes; /* How many processes does this user have? */ atomic_t sigpending; /* How many pending signals does this user have? */ #ifdef CONFIG_FANOTIFY atomic_t fanotify_listeners; diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index fc75af812d73..6d9d180b2c9d 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -50,9 +50,13 @@ enum ucount_type { UCOUNT_INOTIFY_INSTANCES, UCOUNT_INOTIFY_WATCHES, #endif + UCOUNT_RLIMIT_NPROC, UCOUNT_COUNTS, }; +#define UCOUNT_MIN_RLIMIT UCOUNT_RLIMIT_NPROC +#define UCOUNT_MAX_RLIMIT UCOUNT_RLIMIT_NPROC + struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; @@ -104,6 +108,10 @@ void retire_userns_sysctls(struct user_namespace *ns); struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type); void dec_ucount(struct ucounts *ucounts, enum ucount_type type); +long get_rlimit_counter(struct user_namespace *ns, kuid_t uid, enum ucount_type type); +struct ucounts *inc_rlimit_counter(struct user_namespace *ns, kuid_t uid, enum ucount_type type); +void dec_rlimit_counter(struct user_namespace *ns, kuid_t uid, enum ucount_type type); + #ifdef CONFIG_USER_NS static inline struct user_namespace *get_user_ns(struct user_namespace *ns) diff --git a/kernel/cred.c b/kernel/cred.c index 421b1149c651..b6694700e760 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -351,7 +351,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) kdebug("share_creds(%p{%d,%d})", p->cred, atomic_read(&p->cred->usage), read_cred_subscribers(p->cred)); - atomic_inc(&p->cred->user->processes); + inc_rlimit_counter(&init_user_ns, task_euid(p), UCOUNT_RLIMIT_NPROC); return 0; } @@ -384,7 +384,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) } #endif - atomic_inc(&new->user->processes); + inc_rlimit_counter(&init_user_ns, new->euid, UCOUNT_RLIMIT_NPROC); p->cred = p->real_cred = get_cred(new); alter_cred_subscribers(new, 2); validate_creds(new); @@ -486,11 +486,11 @@ int commit_creds(struct cred *new) */ alter_cred_subscribers(new, 2); if (new->user != old->user) - atomic_inc(&new->user->processes); + inc_rlimit_counter(&init_user_ns, new->euid, UCOUNT_RLIMIT_NPROC); rcu_assign_pointer(task->real_cred, new); rcu_assign_pointer(task->cred, new); if (new->user != old->user) - atomic_dec(&old->user->processes); + dec_rlimit_counter(&init_user_ns, old->euid, UCOUNT_RLIMIT_NPROC); alter_cred_subscribers(old, -2); /* send notifications */ diff --git a/kernel/exit.c b/kernel/exit.c index 727150f28103..5a0d7dd1ad64 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -187,7 +187,7 @@ void release_task(struct task_struct *p) /* don't need to get the RCU readlock here - the process is dead and * can't be modifying its own credentials. But shut RCU-lockdep up */ rcu_read_lock(); - atomic_dec(&__task_cred(p)->user->processes); + dec_rlimit_counter(&init_user_ns, task_euid(p), UCOUNT_RLIMIT_NPROC); rcu_read_unlock(); cgroup_release(p); diff --git a/kernel/fork.c b/kernel/fork.c index efc5493203ae..2bc8bd45179f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1844,7 +1844,7 @@ static __latent_entropy struct task_struct *copy_process( int node, struct kernel_clone_args *args) { - int pidfd = -1, retval; + int pidfd = -1, retval, processes; struct task_struct *p; struct multiprocess_signals delayed; struct file *pidfile = NULL; @@ -1958,9 +1958,10 @@ static __latent_entropy struct task_struct *copy_process( DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif + processes = get_rlimit_counter(&init_user_ns, p->real_cred->euid, + UCOUNT_RLIMIT_NPROC); retval = -EAGAIN; - if (atomic_read(&p->real_cred->user->processes) >= - task_rlimit(p, RLIMIT_NPROC)) { + if (processes >= task_rlimit(p, RLIMIT_NPROC)) { if (p->real_cred->user != INIT_USER && !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) goto bad_fork_free; @@ -2361,7 +2362,7 @@ static __latent_entropy struct task_struct *copy_process( #endif delayacct_tsk_free(p); bad_fork_cleanup_count: - atomic_dec(&p->cred->user->processes); + dec_rlimit_counter(&init_user_ns, p->cred->euid, UCOUNT_RLIMIT_NPROC); exit_creds(p); bad_fork_free: p->state = TASK_DEAD; diff --git a/kernel/sys.c b/kernel/sys.c index 00a96746e28a..db780ec32d86 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -461,11 +461,14 @@ SYSCALL_DEFINE1(setgid, gid_t, gid) static int set_user(struct cred *new) { struct user_struct *new_user; + int processes; new_user = alloc_uid(new->uid); if (!new_user) return -EAGAIN; + processes = get_rlimit_counter(&init_user_ns, new_user->uid, UCOUNT_RLIMIT_NPROC); + /* * We don't fail in case of NPROC limit excess here because too many * poorly written programs don't check set*uid() return code, assuming @@ -473,8 +476,7 @@ static int set_user(struct cred *new) * for programs doing set*uid()+execve() by harmlessly deferring the * failure to the execve() stage. */ - if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) && - new_user != INIT_USER) + if (processes >= rlimit(RLIMIT_NPROC) && new_user != INIT_USER) current->flags |= PF_NPROC_EXCEEDED; else current->flags &= ~PF_NPROC_EXCEEDED; diff --git a/kernel/ucount.c b/kernel/ucount.c index 7b2bca8582ef..e00d644e4ca5 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -74,6 +74,7 @@ static struct ctl_table user_table[] = { UCOUNT_ENTRY("max_inotify_instances"), UCOUNT_ENTRY("max_inotify_watches"), #endif + { }, { } }; #endif /* CONFIG_SYSCTL */ @@ -222,6 +223,39 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type) put_ucounts(ucounts); } +long get_rlimit_counter(struct user_namespace *ns, kuid_t uid, enum ucount_type type) +{ + long v; + struct ucounts *ucounts = get_ucounts(ns, uid); + if (!ucounts) + return LONG_MAX; + v = atomic_long_read(&ucounts->ucount[type]); + put_ucounts(ucounts); + return v; +} + +struct ucounts *inc_rlimit_counter(struct user_namespace *ns, kuid_t uid, + enum ucount_type type) +{ + if (type < UCOUNT_MIN_RLIMIT || type > UCOUNT_MAX_RLIMIT) + return NULL; + + return inc_ucount(ns, uid, type); +} + +void dec_rlimit_counter(struct user_namespace *ns, kuid_t uid, enum ucount_type type) +{ + struct ucounts *ucounts; + + if (type < UCOUNT_MIN_RLIMIT || type > UCOUNT_MAX_RLIMIT) + return; + + ucounts = get_ucounts(ns, uid); + + if (ucounts) + dec_ucount(ucounts, type); +} + static __init int user_namespace_sysctl_init(void) { #ifdef CONFIG_SYSCTL diff --git a/kernel/user.c b/kernel/user.c index b1635d94a1f2..5bb75ebdef4f 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -98,7 +98,6 @@ static DEFINE_SPINLOCK(uidhash_lock); /* root_user.__count is 1, for init task cred */ struct user_struct root_user = { .__count = REFCOUNT_INIT(1), - .processes = ATOMIC_INIT(1), .sigpending = ATOMIC_INIT(0), .locked_shm = 0, .uid = GLOBAL_ROOT_UID, @@ -224,6 +223,8 @@ static int __init uid_cache_init(void) uid_hash_insert(&root_user, uidhashentry(GLOBAL_ROOT_UID)); spin_unlock_irq(&uidhash_lock); + inc_rlimit_counter(&init_user_ns, GLOBAL_ROOT_UID, UCOUNT_RLIMIT_NPROC); + return 0; } subsys_initcall(uid_cache_init); -- 2.25.4