Re: [PATCH v2] inotify: Convert to using per-namespace limits

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue 11-10-16 10:36:22, Nikolay Borisov wrote:
> This patchset converts inotify to using the newly introduced
> per-userns sysctl infrastructure.
> 
> Currently the inotify instances/watches are being accounted in the
> user_struct structure. This means that in setups where multiple
> users in unprivileged containers map to the same underlying
> real user (i.e. pointing to the same user_struct) the inotify limits
> are going to be shared as well, allowing one user(or application) to exhaust
> all others limits.
> 
> Fix this by switching the inotify sysctls to using the
> per-namespace/per-user limits. This will allow the server admin to
> set sensible global limits, which can further be tuned inside every
> individual user namespace. Additionally, in order to preserve the
> sysctl ABI make the existing inotify instances/watches sysctls
> modify the values of the initial user namespace.
> 
> Signed-off-by: Nikolay Borisov <kernel@xxxxxxxx>

I cannot really comment on the user-namespaces sanity of this but from fs
notification framework POV the patch looks fine so feel free to add:

Acked-by: Jan Kara <jack@xxxxxxx>

								Honza

> ---
> 
> So here is a revised version which retains the existing sysctls,
> and hooks them to the init_user_ns values. 
> 
>  fs/notify/inotify/inotify.h          | 17 +++++++++++++++++
>  fs/notify/inotify/inotify_fsnotify.c |  6 ++----
>  fs/notify/inotify/inotify_user.c     | 34 +++++++++++++++++-----------------
>  include/linux/fsnotify_backend.h     |  3 ++-
>  include/linux/sched.h                |  4 ----
>  include/linux/user_namespace.h       |  4 ++++
>  kernel/ucount.c                      |  6 +++++-
>  7 files changed, 47 insertions(+), 27 deletions(-)
> 
> diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
> index ed855ef6f077..b5536f8ad3e0 100644
> --- a/fs/notify/inotify/inotify.h
> +++ b/fs/notify/inotify/inotify.h
> @@ -30,3 +30,20 @@ extern int inotify_handle_event(struct fsnotify_group *group,
>  				const unsigned char *file_name, u32 cookie);
>  
>  extern const struct fsnotify_ops inotify_fsnotify_ops;
> +
> +#ifdef CONFIG_INOTIFY_USER
> +static void dec_inotify_instances(struct ucounts *ucounts)
> +{
> +	dec_ucount(ucounts, UCOUNT_INOTIFY_INSTANCES);
> +}
> +
> +static struct ucounts *inc_inotify_watches(struct ucounts *ucounts)
> +{
> +	return inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_INOTIFY_WATCHES);
> +}
> +
> +static void dec_inotify_watches(struct ucounts *ucounts)
> +{
> +	dec_ucount(ucounts, UCOUNT_INOTIFY_WATCHES);
> +}
> +#endif
> diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
> index 2cd900c2c737..1e6b3cfc2cfd 100644
> --- a/fs/notify/inotify/inotify_fsnotify.c
> +++ b/fs/notify/inotify/inotify_fsnotify.c
> @@ -165,10 +165,8 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
>  	/* ideally the idr is empty and we won't hit the BUG in the callback */
>  	idr_for_each(&group->inotify_data.idr, idr_callback, group);
>  	idr_destroy(&group->inotify_data.idr);
> -	if (group->inotify_data.user) {
> -		atomic_dec(&group->inotify_data.user->inotify_devs);
> -		free_uid(group->inotify_data.user);
> -	}
> +	if (group->inotify_data.ucounts)
> +		dec_inotify_instances(group->inotify_data.ucounts);
>  }
>  
>  static void inotify_free_event(struct fsnotify_event *fsn_event)
> diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
> index b8d08d0d0a4d..7d769a824742 100644
> --- a/fs/notify/inotify/inotify_user.c
> +++ b/fs/notify/inotify/inotify_user.c
> @@ -44,10 +44,8 @@
>  
>  #include <asm/ioctls.h>
>  
> -/* these are configurable via /proc/sys/fs/inotify/ */
> -static int inotify_max_user_instances __read_mostly;
> +/* configurable via /proc/sys/fs/inotify/ */
>  static int inotify_max_queued_events __read_mostly;
> -static int inotify_max_user_watches __read_mostly;
>  
>  static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
>  
> @@ -60,7 +58,7 @@ static int zero;
>  struct ctl_table inotify_table[] = {
>  	{
>  		.procname	= "max_user_instances",
> -		.data		= &inotify_max_user_instances,
> +		.data		= &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES],
>  		.maxlen		= sizeof(int),
>  		.mode		= 0644,
>  		.proc_handler	= proc_dointvec_minmax,
> @@ -68,7 +66,7 @@ struct ctl_table inotify_table[] = {
>  	},
>  	{
>  		.procname	= "max_user_watches",
> -		.data		= &inotify_max_user_watches,
> +		.data		= &init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES],
>  		.maxlen		= sizeof(int),
>  		.mode		= 0644,
>  		.proc_handler	= proc_dointvec_minmax,
> @@ -500,7 +498,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
>  	/* remove this mark from the idr */
>  	inotify_remove_from_idr(group, i_mark);
>  
> -	atomic_dec(&group->inotify_data.user->inotify_watches);
> +	dec_inotify_watches(group->inotify_data.ucounts);
>  }
>  
>  /* ding dong the mark is dead */
> @@ -584,14 +582,17 @@ static int inotify_new_watch(struct fsnotify_group *group,
>  	tmp_i_mark->fsn_mark.mask = mask;
>  	tmp_i_mark->wd = -1;
>  
> -	ret = -ENOSPC;
> -	if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
> -		goto out_err;
> -
>  	ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark);
>  	if (ret)
>  		goto out_err;
>  
> +	/* increment the number of watches the user has */
> +	if (!inc_inotify_watches(group->inotify_data.ucounts)) {
> +		inotify_remove_from_idr(group, tmp_i_mark);
> +		ret = -ENOSPC;
> +		goto out_err;
> +	}
> +
>  	/* we are on the idr, now get on the inode */
>  	ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, group, inode,
>  				       NULL, 0);
> @@ -601,8 +602,6 @@ static int inotify_new_watch(struct fsnotify_group *group,
>  		goto out_err;
>  	}
>  
> -	/* increment the number of watches the user has */
> -	atomic_inc(&group->inotify_data.user->inotify_watches);
>  
>  	/* return the watch descriptor for this new mark */
>  	ret = tmp_i_mark->wd;
> @@ -653,10 +652,11 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
>  
>  	spin_lock_init(&group->inotify_data.idr_lock);
>  	idr_init(&group->inotify_data.idr);
> -	group->inotify_data.user = get_current_user();
> +	group->inotify_data.ucounts = inc_ucount(current_user_ns(),
> +						 current_euid(),
> +						 UCOUNT_INOTIFY_INSTANCES);
>  
> -	if (atomic_inc_return(&group->inotify_data.user->inotify_devs) >
> -	    inotify_max_user_instances) {
> +	if (!group->inotify_data.ucounts) {
>  		fsnotify_destroy_group(group);
>  		return ERR_PTR(-EMFILE);
>  	}
> @@ -819,8 +819,8 @@ static int __init inotify_user_setup(void)
>  	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
>  
>  	inotify_max_queued_events = 16384;
> -	inotify_max_user_instances = 128;
> -	inotify_max_user_watches = 8192;
> +	init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;
> +	init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = 8192;
>  
>  	return 0;
>  }
> diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
> index 58205f33af02..892959de1162 100644
> --- a/include/linux/fsnotify_backend.h
> +++ b/include/linux/fsnotify_backend.h
> @@ -16,6 +16,7 @@
>  #include <linux/spinlock.h>
>  #include <linux/types.h>
>  #include <linux/atomic.h>
> +#include <linux/user_namespace.h>
>  
>  /*
>   * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
> @@ -169,7 +170,7 @@ struct fsnotify_group {
>  		struct inotify_group_private_data {
>  			spinlock_t	idr_lock;
>  			struct idr      idr;
> -			struct user_struct      *user;
> +			struct ucounts *ucounts;
>  		} inotify_data;
>  #endif
>  #ifdef CONFIG_FANOTIFY
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 62c68e513e39..35230a2c73ac 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -840,10 +840,6 @@ struct user_struct {
>  	atomic_t __count;	/* reference count */
>  	atomic_t processes;	/* How many processes does this user have? */
>  	atomic_t sigpending;	/* How many pending signals does this user have? */
> -#ifdef CONFIG_INOTIFY_USER
> -	atomic_t inotify_watches; /* How many inotify watches does this user have? */
> -	atomic_t inotify_devs;	/* How many inotify devs does this user have opened? */
> -#endif
>  #ifdef CONFIG_FANOTIFY
>  	atomic_t fanotify_listeners;
>  #endif
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index eb209d4523f5..cbcac7a5ec41 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -32,6 +32,10 @@ enum ucount_type {
>  	UCOUNT_NET_NAMESPACES,
>  	UCOUNT_MNT_NAMESPACES,
>  	UCOUNT_CGROUP_NAMESPACES,
> +#ifdef CONFIG_INOTIFY_USER 
> +	UCOUNT_INOTIFY_INSTANCES,
> +	UCOUNT_INOTIFY_WATCHES,
> +#endif
>  	UCOUNT_COUNTS,
>  };
>  
> diff --git a/kernel/ucount.c b/kernel/ucount.c
> index 9d20d5dd298a..a6bcea47306b 100644
> --- a/kernel/ucount.c
> +++ b/kernel/ucount.c
> @@ -57,7 +57,7 @@ static struct ctl_table_root set_root = {
>  
>  static int zero = 0;
>  static int int_max = INT_MAX;
> -#define UCOUNT_ENTRY(name) 				\
> +#define UCOUNT_ENTRY(name)				\
>  	{						\
>  		.procname	= name,			\
>  		.maxlen		= sizeof(int),		\
> @@ -74,6 +74,10 @@ static struct ctl_table user_table[] = {
>  	UCOUNT_ENTRY("max_net_namespaces"),
>  	UCOUNT_ENTRY("max_mnt_namespaces"),
>  	UCOUNT_ENTRY("max_cgroup_namespaces"),
> +#ifdef CONFIG_INOTIFY_USER_
> +	UCOUNT_ENTRY("max_inotify_instances"),
> +	UCOUNT_ENTRY("max_inotify_watches"),
> +#endif
>  	{ }
>  };
>  #endif /* CONFIG_SYSCTL */
> -- 
> 2.5.0
> 
-- 
Jan Kara <jack@xxxxxxxx>
SUSE Labs, CR
_______________________________________________
Containers mailing list
Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linuxfoundation.org/mailman/listinfo/containers



[Index of Archives]     [Cgroups]     [Netdev]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux