On Tue, 2024-10-29 at 16:12 -0700, Song Liu wrote: > fanotify fastpath handler enables handling fanotify events within the > kernel, and thus saves a trip to the user space. fanotify fastpath handler > can be useful in many use cases. For example, if a user is only interested > in events for some files in side a directory, a fastpath handler can be > used to filter out irrelevant events. > > fanotify fastpath handler is attached to fsnotify_group. At most one > fastpath handler can be attached to a fsnotify_group. The attach/detach > of fastpath handlers are controlled by two new ioctls on the fanotify fds: > FAN_IOC_ADD_FP and FAN_IOC_DEL_FP. > > fanotify fastpath handler is packaged in a kernel module. In the future, > it is also possible to package fastpath handler in a BPF program. Since > loading modules requires CAP_SYS_ADMIN, _loading_ fanotify fastpath > handler in kernel modules is limited to CAP_SYS_ADMIN. However, > non-SYS_CAP_ADMIN users can _attach_ fastpath handler loaded by sys admin > to their fanotify fds. To make fanotify fastpath handler more useful > for non-CAP_SYS_ADMIN users, a fastpath handler can take arguments at > attach time. > > TODO: Add some mechanism to help users discover available fastpath > handlers. For example, we can add a sysctl which is similar to > net.ipv4.tcp_available_congestion_control, or we can add some sysfs > entries. > > Signed-off-by: Song Liu <song@xxxxxxxxxx> > --- > fs/notify/fanotify/Makefile | 2 +- > fs/notify/fanotify/fanotify.c | 25 ++++ > fs/notify/fanotify/fanotify_fastpath.c | 171 +++++++++++++++++++++++++ > fs/notify/fanotify/fanotify_user.c | 7 + > include/linux/fanotify.h | 45 +++++++ > include/linux/fsnotify_backend.h | 3 + > include/uapi/linux/fanotify.h | 26 ++++ > 7 files changed, 278 insertions(+), 1 deletion(-) > create mode 100644 fs/notify/fanotify/fanotify_fastpath.c > > diff --git a/fs/notify/fanotify/Makefile b/fs/notify/fanotify/Makefile > index 25ef222915e5..fddab88dde37 100644 > --- a/fs/notify/fanotify/Makefile > +++ b/fs/notify/fanotify/Makefile > @@ -1,2 +1,2 @@ > # SPDX-License-Identifier: GPL-2.0-only > -obj-$(CONFIG_FANOTIFY) += fanotify.o fanotify_user.o > +obj-$(CONFIG_FANOTIFY) += fanotify.o fanotify_user.o fanotify_fastpath.o This should probably be a compile-time option. Some people might be spooked by this and we'd want a way to disable it if it was found to cause a big issue later. > diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c > index 224bccaab4cc..a40ec06d0218 100644 > --- a/fs/notify/fanotify/fanotify.c > +++ b/fs/notify/fanotify/fanotify.c > @@ -18,6 +18,8 @@ > > #include "fanotify.h" > > +extern struct srcu_struct fsnotify_mark_srcu; > + > static bool fanotify_path_equal(const struct path *p1, const struct path *p2) > { > return p1->mnt == p2->mnt && p1->dentry == p2->dentry; > @@ -888,6 +890,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, > struct fsnotify_event *fsn_event; > __kernel_fsid_t fsid = {}; > u32 match_mask = 0; > + struct fanotify_fastpath_hook *fp_hook; > > BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS); > BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY); > @@ -933,6 +936,25 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, > if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS)) > fsid = fanotify_get_fsid(iter_info); > > + fp_hook = srcu_dereference(group->fanotify_data.fp_hook, &fsnotify_mark_srcu); > + if (fp_hook) { > + struct fanotify_fastpath_event fp_event = { > + .mask = mask, > + .data = data, > + .data_type = data_type, > + .dir = dir, > + .file_name = file_name, > + .fsid = &fsid, > + .match_mask = match_mask, > + }; > + > + ret = fp_hook->ops->fp_handler(group, fp_hook, &fp_event); > + if (ret == FAN_FP_RET_SKIP_EVENT) { > + ret = 0; > + goto finish; > + } > + } > + > event = fanotify_alloc_event(group, mask, data, data_type, dir, > file_name, &fsid, match_mask); > ret = -ENOMEM; > @@ -976,6 +998,9 @@ static void fanotify_free_group_priv(struct fsnotify_group *group) > > if (mempool_initialized(&group->fanotify_data.error_events_pool)) > mempool_exit(&group->fanotify_data.error_events_pool); > + > + if (group->fanotify_data.fp_hook) > + fanotify_fastpath_hook_free(group->fanotify_data.fp_hook); > } > > static void fanotify_free_path_event(struct fanotify_event *event) > diff --git a/fs/notify/fanotify/fanotify_fastpath.c b/fs/notify/fanotify/fanotify_fastpath.c > new file mode 100644 > index 000000000000..0453a1ac25b1 > --- /dev/null > +++ b/fs/notify/fanotify/fanotify_fastpath.c > @@ -0,0 +1,171 @@ > +// SPDX-License-Identifier: GPL-2.0 > +#include <linux/fanotify.h> > +#include <linux/module.h> > + > +#include "fanotify.h" > + > +extern struct srcu_struct fsnotify_mark_srcu; > + > +static DEFINE_SPINLOCK(fp_list_lock); > +static LIST_HEAD(fp_list); > + > +static struct fanotify_fastpath_ops *fanotify_fastpath_find(const char *name) > +{ > + struct fanotify_fastpath_ops *ops; > + > + list_for_each_entry(ops, &fp_list, list) { > + if (!strcmp(ops->name, name)) > + return ops; > + } > + return NULL; > +} > + > + > +/* > + * fanotify_fastpath_register - Register a new fastpath handler. > + * > + * Add a fastpath handler to the fp_list. These fastpath handlers are > + * available for all users in the system. > + * > + * @ops: pointer to fanotify_fastpath_ops to add. > + * > + * Returns: > + * 0 - on success; > + * -EEXIST - fastpath handler of the same name already exists. > + */ > +int fanotify_fastpath_register(struct fanotify_fastpath_ops *ops) > +{ > + spin_lock(&fp_list_lock); > + if (fanotify_fastpath_find(ops->name)) { > + /* cannot register two handlers with the same name */ > + spin_unlock(&fp_list_lock); > + return -EEXIST; > + } > + list_add_tail(&ops->list, &fp_list); > + spin_unlock(&fp_list_lock); > + return 0; > +} > +EXPORT_SYMBOL_GPL(fanotify_fastpath_register); > + > +/* > + * fanotify_fastpath_unregister - Unregister a new fastpath handler. > + * > + * Remove a fastpath handler from fp_list. > + * > + * @ops: pointer to fanotify_fastpath_ops to remove. > + */ > +void fanotify_fastpath_unregister(struct fanotify_fastpath_ops *ops) > +{ > + spin_lock(&fp_list_lock); > + list_del_init(&ops->list); > + spin_unlock(&fp_list_lock); > +} > +EXPORT_SYMBOL_GPL(fanotify_fastpath_unregister); > + > +/* > + * fanotify_fastpath_add - Add a fastpath handler to fsnotify_group. > + * > + * Add a fastpath handler from fp_list to a fsnotify_group. > + * > + * @group: fsnotify_group that will have add > + * @argp: fanotify_fastpath_args that specifies the fastpath handler > + * and the init arguments of the fastpath handler. > + * > + * Returns: > + * 0 - on success; > + * -EEXIST - fastpath handler of the same name already exists. > + */ > +int fanotify_fastpath_add(struct fsnotify_group *group, > + struct fanotify_fastpath_args __user *argp) > +{ > + struct fanotify_fastpath_hook *fp_hook; > + struct fanotify_fastpath_ops *fp_ops; > + struct fanotify_fastpath_args args; > + int ret = 0; > + > + ret = copy_from_user(&args, argp, sizeof(args)); > + if (ret) > + return -EFAULT; > + > + if (args.version != 1 || args.flags || args.init_args_len > FAN_FP_ARGS_MAX) > + return -EINVAL; > + > + args.name[FAN_FP_NAME_MAX - 1] = '\0'; > + > + fsnotify_group_lock(group); > + > + if (rcu_access_pointer(group->fanotify_data.fp_hook)) { > + fsnotify_group_unlock(group); > + return -EBUSY; > + } > + > + fp_hook = kzalloc(sizeof(*fp_hook), GFP_KERNEL); > + if (!fp_hook) { > + ret = -ENOMEM; > + goto out; > + } > + > + spin_lock(&fp_list_lock); > + fp_ops = fanotify_fastpath_find(args.name); > + if (!fp_ops || !try_module_get(fp_ops->owner)) { > + spin_unlock(&fp_list_lock); > + ret = -ENOENT; > + goto err_free_hook; > + } > + spin_unlock(&fp_list_lock); > + > + if (fp_ops->fp_init) { > + char *init_args = NULL; > + > + if (args.init_args_len) { > + init_args = strndup_user(u64_to_user_ptr(args.init_args), > + args.init_args_len); > + if (IS_ERR(init_args)) { > + ret = PTR_ERR(init_args); > + if (ret == -EINVAL) > + ret = -E2BIG; > + goto err_module_put; > + } > + } > + ret = fp_ops->fp_init(fp_hook, init_args); > + kfree(init_args); > + if (ret) > + goto err_module_put; > + } > + fp_hook->ops = fp_ops; > + rcu_assign_pointer(group->fanotify_data.fp_hook, fp_hook); > + > +out: > + fsnotify_group_unlock(group); > + return ret; > + > +err_module_put: > + module_put(fp_ops->owner); > +err_free_hook: > + kfree(fp_hook); > + goto out; > +} > + > +void fanotify_fastpath_hook_free(struct fanotify_fastpath_hook *fp_hook) > +{ > + if (fp_hook->ops->fp_free) > + fp_hook->ops->fp_free(fp_hook); > + > + module_put(fp_hook->ops->owner); > +} > + > +void fanotify_fastpath_del(struct fsnotify_group *group) > +{ > + struct fanotify_fastpath_hook *fp_hook; > + > + fsnotify_group_lock(group); > + fp_hook = group->fanotify_data.fp_hook; > + if (!fp_hook) > + goto out; > + > + rcu_assign_pointer(group->fanotify_data.fp_hook, NULL); > + fanotify_fastpath_hook_free(fp_hook); > + > +out: > + fsnotify_group_unlock(group); > +} > diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c > index 8e2d43fc6f7c..e96cb83f8409 100644 > --- a/fs/notify/fanotify/fanotify_user.c > +++ b/fs/notify/fanotify/fanotify_user.c > @@ -987,6 +987,13 @@ static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long ar > spin_unlock(&group->notification_lock); > ret = put_user(send_len, (int __user *) p); > break; > + case FAN_IOC_ADD_FP: > + ret = fanotify_fastpath_add(group, p); > + break; > + case FAN_IOC_DEL_FP: > + fanotify_fastpath_del(group); > + ret = 0; > + break; > } > > return ret; > diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h > index 89ff45bd6f01..cea95307a580 100644 > --- a/include/linux/fanotify.h > +++ b/include/linux/fanotify.h > @@ -136,4 +136,49 @@ > #undef FAN_ALL_PERM_EVENTS > #undef FAN_ALL_OUTGOING_EVENTS > > +struct fsnotify_group; > +struct qstr; > +struct inode; > +struct fanotify_fastpath_hook; > + This would be a nice place for a doc header that describes what all of these fields represent. Most of it should probably be the same as the comment header over the struct fsnotify_ops definition: > +struct fanotify_fastpath_event { > + u32 mask; > + const void *data; > + int data_type; > + struct inode *dir; > + const struct qstr *file_name; > + __kernel_fsid_t *fsid; > + u32 match_mask; > +}; > + Ditto here. This should have a kerneldoc comment that describes the purpose, arguments and the "contract" around these new operations. > +struct fanotify_fastpath_ops { > + int (*fp_handler)(struct fsnotify_group *group, > + struct fanotify_fastpath_hook *fp_hook, > + struct fanotify_fastpath_event *fp_event); > + int (*fp_init)(struct fanotify_fastpath_hook *hook, const char *args); > + void (*fp_free)(struct fanotify_fastpath_hook *hook); > + > + char name[FAN_FP_NAME_MAX]; > + struct module *owner; > + struct list_head list; > + int flags; > +}; > + > +enum fanotify_fastpath_return { > + FAN_FP_RET_SEND_TO_USERSPACE = 0, > + FAN_FP_RET_SKIP_EVENT = 1, > +}; > + > +struct fanotify_fastpath_hook { > + struct fanotify_fastpath_ops *ops; > + void *data; > +}; > + > +int fanotify_fastpath_register(struct fanotify_fastpath_ops *ops); > +void fanotify_fastpath_unregister(struct fanotify_fastpath_ops *ops); > +int fanotify_fastpath_add(struct fsnotify_group *group, > + struct fanotify_fastpath_args __user *args); > +void fanotify_fastpath_del(struct fsnotify_group *group); > +void fanotify_fastpath_hook_free(struct fanotify_fastpath_hook *fp_hook); > + > #endif /* _LINUX_FANOTIFY_H */ > diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h > index 3ecf7768e577..ef251b4e4e6f 100644 > --- a/include/linux/fsnotify_backend.h > +++ b/include/linux/fsnotify_backend.h > @@ -117,6 +117,7 @@ struct fsnotify_fname; > struct fsnotify_iter_info; > > struct mem_cgroup; > +struct fanotify_fastpath_hook; > > /* > * Each group much define these ops. The fsnotify infrastructure will call > @@ -255,6 +256,8 @@ struct fsnotify_group { > int f_flags; /* event_f_flags from fanotify_init() */ > struct ucounts *ucounts; > mempool_t error_events_pool; > + > + struct fanotify_fastpath_hook __rcu *fp_hook; > } fanotify_data; > #endif /* CONFIG_FANOTIFY */ > }; > diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h > index 34f221d3a1b9..9c30baeebae0 100644 > --- a/include/uapi/linux/fanotify.h > +++ b/include/uapi/linux/fanotify.h > @@ -3,6 +3,7 @@ > #define _UAPI_LINUX_FANOTIFY_H > > #include <linux/types.h> > +#include <linux/ioctl.h> > > /* the following events that user-space can register for */ > #define FAN_ACCESS 0x00000001 /* File was accessed */ > @@ -243,4 +244,29 @@ struct fanotify_response_info_audit_rule { > (long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \ > (long)(meta)->event_len <= (long)(len)) > > +#define FAN_FP_NAME_MAX 64 > +#define FAN_FP_ARGS_MAX 1024 > + > +/* This is the arguments used to add fastpath handler to a group. */ > +struct fanotify_fastpath_args { > + /* user space pointer to the name of fastpath handler */ > + char name[FAN_FP_NAME_MAX]; > + > + __u32 version; > + __u32 flags; > + > + /* > + * user space pointer to the init args of fastpath handler, > + * up to init_args_len (<= FAN_FP_ARGS_MAX). > + */ > + __u64 init_args; > + /* length of init_args */ > + __u32 init_args_len; > +} __attribute__((__packed__)); > + > +#define FAN_IOC_MAGIC 'F' > + > +#define FAN_IOC_ADD_FP _IOW(FAN_IOC_MAGIC, 0, struct fanotify_fastpath_args) > +#define FAN_IOC_DEL_FP _IOW(FAN_IOC_MAGIC, 1, char[FAN_FP_NAME_MAX]) > + > #endif /* _UAPI_LINUX_FANOTIFY_H */ Otherwise, this looks fairly straightforward. Nice work, Song! -- Jeff Layton <jlayton@xxxxxxxxxx>