Dynamic memory allocation is needed to modify .data and specify the per namespace parameter. The new sysctl API is allowed to get rid of the need for such modification. Signed-off-by: Alexey Gladkov <legion@xxxxxxxxxx> --- include/linux/user_namespace.h | 6 -- kernel/ucount.c | 116 +++++++++++++-------------------- kernel/user_namespace.c | 10 +-- 3 files changed, 46 insertions(+), 86 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 45f09bec02c4..7b134516e5cb 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -95,10 +95,6 @@ struct user_namespace { struct key *persistent_keyring_register; #endif struct work_struct work; -#ifdef CONFIG_SYSCTL - struct ctl_table_set set; - struct ctl_table_header *sysctls; -#endif struct ucounts *ucounts; long ucount_max[UCOUNT_COUNTS]; long rlimit_max[UCOUNT_RLIMIT_COUNTS]; @@ -116,8 +112,6 @@ struct ucounts { extern struct user_namespace init_user_ns; extern struct ucounts init_ucounts; -bool setup_userns_sysctls(struct user_namespace *ns); -void retire_userns_sysctls(struct user_namespace *ns); struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type); void dec_ucount(struct ucounts *ucounts, enum ucount_type type); struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid); diff --git a/kernel/ucount.c b/kernel/ucount.c index ee8e57fd6f90..4a5072671847 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -7,6 +7,7 @@ #include <linux/hash.h> #include <linux/kmemleak.h> #include <linux/user_namespace.h> +#include <linux/fs.h> struct ucounts init_ucounts = { .ns = &init_user_ns, @@ -26,38 +27,20 @@ static DEFINE_SPINLOCK(ucounts_lock); #ifdef CONFIG_SYSCTL -static struct ctl_table_set * -set_lookup(struct ctl_table_root *root) -{ - return ¤t_user_ns()->set; -} - -static int set_is_seen(struct ctl_table_set *set) -{ - return ¤t_user_ns()->set == set; -} - -static int set_permissions(struct ctl_table_header *head, - struct ctl_table *table) -{ - struct user_namespace *user_ns = - container_of(head->set, struct user_namespace, set); - int mode; - - /* Allow users with CAP_SYS_RESOURCE unrestrained access */ - if (ns_capable(user_ns, CAP_SYS_RESOURCE)) - mode = (table->mode & S_IRWXU) >> 6; - else - /* Allow all others at most read-only access */ - mode = table->mode & S_IROTH; - return (mode << 6) | (mode << 3) | mode; -} - -static struct ctl_table_root set_root = { - .lookup = set_lookup, - .permissions = set_permissions, +static int user_sys_open(struct ctl_context *ctx, struct inode *inode, + struct file *file); +static ssize_t user_sys_read(struct ctl_context *ctx, struct file *file, + char *buffer, size_t *lenp, loff_t *ppos); +static ssize_t user_sys_write(struct ctl_context *ctx, struct file *file, + char *buffer, size_t *lenp, loff_t *ppos); + +static struct ctl_fops user_sys_fops = { + .open = user_sys_open, + .read = user_sys_read, + .write = user_sys_write, }; +static long ue_dummy = 0; static long ue_zero = 0; static long ue_int_max = INT_MAX; @@ -66,9 +49,11 @@ static long ue_int_max = INT_MAX; .procname = name, \ .maxlen = sizeof(long), \ .mode = 0644, \ + .data = &ue_dummy, \ .proc_handler = proc_doulongvec_minmax, \ .extra1 = &ue_zero, \ .extra2 = &ue_int_max, \ + .ctl_fops = &user_sys_fops, \ } static struct ctl_table user_table[] = { UCOUNT_ENTRY("max_user_namespaces"), @@ -89,44 +74,43 @@ static struct ctl_table user_table[] = { #endif { } }; -#endif /* CONFIG_SYSCTL */ -bool setup_userns_sysctls(struct user_namespace *ns) +static int user_sys_open(struct ctl_context *ctx, struct inode *inode, struct file *file) { -#ifdef CONFIG_SYSCTL - struct ctl_table *tbl; - - BUILD_BUG_ON(ARRAY_SIZE(user_table) != UCOUNT_COUNTS + 1); - setup_sysctl_set(&ns->set, &set_root, set_is_seen); - tbl = kmemdup(user_table, sizeof(user_table), GFP_KERNEL); - if (tbl) { - int i; - for (i = 0; i < UCOUNT_COUNTS; i++) { - tbl[i].data = &ns->ucount_max[i]; - } - ns->sysctls = __register_sysctl_table(&ns->set, "user", tbl); - } - if (!ns->sysctls) { - kfree(tbl); - retire_sysctl_set(&ns->set); - return false; - } -#endif - return true; + /* Allow users with CAP_SYS_RESOURCE unrestrained access */ + if ((file->f_mode & FMODE_WRITE) && + !ns_capable(file->f_cred->user_ns, CAP_SYS_RESOURCE)) + return -EPERM; + return 0; } -void retire_userns_sysctls(struct user_namespace *ns) +static ssize_t user_sys_read(struct ctl_context *ctx, struct file *file, + char *buffer, size_t *lenp, loff_t *ppos) { -#ifdef CONFIG_SYSCTL - struct ctl_table *tbl; + struct ctl_table table = *ctx->table; + table.data = &file->f_cred->user_ns->ucount_max[ctx->table - user_table]; + return table.proc_handler(&table, 0, buffer, lenp, ppos); +} - tbl = ns->sysctls->ctl_table_arg; - unregister_sysctl_table(ns->sysctls); - retire_sysctl_set(&ns->set); - kfree(tbl); -#endif +static ssize_t user_sys_write(struct ctl_context *ctx, struct file *file, + char *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table table = *ctx->table; + table.data = &file->f_cred->user_ns->ucount_max[ctx->table - user_table]; + return table.proc_handler(&table, 1, buffer, lenp, ppos); } +static struct ctl_table user_root_table[] = { + { + .procname = "user", + .mode = 0555, + .child = user_table, + }, + {} +}; + +#endif /* CONFIG_SYSCTL */ + static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent) { struct ucounts *ucounts; @@ -357,17 +341,7 @@ bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigne static __init int user_namespace_sysctl_init(void) { #ifdef CONFIG_SYSCTL - static struct ctl_table_header *user_header; - static struct ctl_table empty[1]; - /* - * It is necessary to register the user directory in the - * default set so that registrations in the child sets work - * properly. - */ - user_header = register_sysctl("user", empty); - kmemleak_ignore(user_header); - BUG_ON(!user_header); - BUG_ON(!setup_userns_sysctls(&init_user_ns)); + register_sysctl_table(user_root_table); #endif hlist_add_ucounts(&init_ucounts); inc_rlimit_ucounts(&init_ucounts, UCOUNT_RLIMIT_NPROC, 1); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 981bb2d10d83..c0e707bc9a31 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -149,17 +149,10 @@ int create_user_ns(struct cred *new) INIT_LIST_HEAD(&ns->keyring_name_list); init_rwsem(&ns->keyring_sem); #endif - ret = -ENOMEM; - if (!setup_userns_sysctls(ns)) - goto fail_keyring; set_cred_user_ns(new, ns); return 0; -fail_keyring: -#ifdef CONFIG_PERSISTENT_KEYRINGS - key_put(ns->persistent_keyring_register); -#endif - ns_free_inum(&ns->ns); + fail_free: kmem_cache_free(user_ns_cachep, ns); fail_dec: @@ -208,7 +201,6 @@ static void free_user_ns(struct work_struct *work) kfree(ns->projid_map.forward); kfree(ns->projid_map.reverse); } - retire_userns_sysctls(ns); key_free_user_ns(ns); ns_free_inum(&ns->ns); kmem_cache_free(user_ns_cachep, ns); -- 2.33.3