On Sun, Dec 20, 2020 at 6:46 AM Shakeel Butt <shakeelb@xxxxxxxxxx> wrote: > > Currently the fs sysctl inotify/max_user_instances is used to limit the > number of inotify instances on the system. For systems running multiple > workloads, the per-user namespace sysctl max_inotify_instances can be > used to further partition inotify instances. However there is no easy > way to set a sensible system level max limit on inotify instances and > further partition it between the workloads. It is much easier to charge > the underlying resource (i.e. memory) behind the inotify instances to > the memcg of the workload and let their memory limits limit the number > of inotify instances they can create. > > With inotify instances charged to memcg, the admin can simply set > max_user_instances to INT_MAX and let the memcg limits of the jobs limit > their inotify instances. > > Signed-off-by: Shakeel Butt <shakeelb@xxxxxxxxxx> Reviewed-by: Amir Goldstein <amir73il@xxxxxxxxx> > --- > Changes since v1: > - introduce fsnotify_alloc_user_group() and convert fanotify in addition > to inotify to use that function. [suggested by Amir] > > fs/notify/fanotify/fanotify_user.c | 2 +- > fs/notify/group.c | 25 ++++++++++++++++++++----- > fs/notify/inotify/inotify_user.c | 4 ++-- > include/linux/fsnotify_backend.h | 1 + > 4 files changed, 24 insertions(+), 8 deletions(-) > > diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c > index 3e01d8f2ab90..7e7afc2b62e1 100644 > --- a/fs/notify/fanotify/fanotify_user.c > +++ b/fs/notify/fanotify/fanotify_user.c > @@ -976,7 +976,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) > f_flags |= O_NONBLOCK; > > /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ > - group = fsnotify_alloc_group(&fanotify_fsnotify_ops); > + group = fsnotify_alloc_user_group(&fanotify_fsnotify_ops); > if (IS_ERR(group)) { > free_uid(user); > return PTR_ERR(group); > diff --git a/fs/notify/group.c b/fs/notify/group.c > index a4a4b1c64d32..ffd723ffe46d 100644 > --- a/fs/notify/group.c > +++ b/fs/notify/group.c > @@ -111,14 +111,12 @@ void fsnotify_put_group(struct fsnotify_group *group) > } > EXPORT_SYMBOL_GPL(fsnotify_put_group); > > -/* > - * Create a new fsnotify_group and hold a reference for the group returned. > - */ > -struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) > +static struct fsnotify_group *__fsnotify_alloc_group( > + const struct fsnotify_ops *ops, gfp_t gfp) > { > struct fsnotify_group *group; > > - group = kzalloc(sizeof(struct fsnotify_group), GFP_KERNEL); > + group = kzalloc(sizeof(struct fsnotify_group), gfp); > if (!group) > return ERR_PTR(-ENOMEM); > > @@ -139,8 +137,25 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) > > return group; > } > + > +/* > + * Create a new fsnotify_group and hold a reference for the group returned. > + */ > +struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) > +{ > + return __fsnotify_alloc_group(ops, GFP_KERNEL); > +} > EXPORT_SYMBOL_GPL(fsnotify_alloc_group); > > +/* > + * Create a new fsnotify_group and hold a reference for the group returned. > + */ > +struct fsnotify_group *fsnotify_alloc_user_group(const struct fsnotify_ops *ops) > +{ > + return __fsnotify_alloc_group(ops, GFP_KERNEL_ACCOUNT); > +} > +EXPORT_SYMBOL_GPL(fsnotify_alloc_user_group); > + > int fsnotify_fasync(int fd, struct file *file, int on) > { > struct fsnotify_group *group = file->private_data; > diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c > index 59c177011a0f..266d17e8ecb9 100644 > --- a/fs/notify/inotify/inotify_user.c > +++ b/fs/notify/inotify/inotify_user.c > @@ -632,11 +632,11 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) > struct fsnotify_group *group; > struct inotify_event_info *oevent; > > - group = fsnotify_alloc_group(&inotify_fsnotify_ops); > + group = fsnotify_alloc_user_group(&inotify_fsnotify_ops); > if (IS_ERR(group)) > return group; > > - oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL); > + oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL_ACCOUNT); > if (unlikely(!oevent)) { > fsnotify_destroy_group(group); > return ERR_PTR(-ENOMEM); > diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h > index a2e42d3cd87c..e5409b83e731 100644 > --- a/include/linux/fsnotify_backend.h > +++ b/include/linux/fsnotify_backend.h > @@ -470,6 +470,7 @@ static inline void fsnotify_update_flags(struct dentry *dentry) > > /* create a new group */ > extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops); > +extern struct fsnotify_group *fsnotify_alloc_user_group(const struct fsnotify_ops *ops); > /* get reference to a group */ > extern void fsnotify_get_group(struct fsnotify_group *group); > /* drop reference on a group from fsnotify_alloc_group */ > -- > 2.29.2.684.gfbc64c5ab5-goog >