On Thu 10-01-19 19:04:37, Amir Goldstein wrote: > For FAN_REPORT_FID, we need to encode fid with fsid of the filesystem on > every event. To avoid having to call vfs_statfs() on every event to get > fsid, we store the fsid in fsnotify_mark_connector on the first time we > add a mark and on handle event we use the cached fsid. > > Subsequent calls to add mark on the same object are expected to pass the > same fsid, so the call will fail on cached fsid mismatch. > > If an event is reported on several mark types (inode, mount, filesystem), > all connectors should already have the same fsid, so we use the cached > fsid from the first connector. > > Suggested-by: Jan Kara <jack@xxxxxxx> > Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> I've somewhat modified the patch to simplify code flow around fanotify_get_fid() and also made fsid argument for fsnotify_add_mark_locked() mandatory. The resulting patch is attached. Honza > --- > fs/notify/fanotify/fanotify.c | 51 +++++++++++++++++------- > fs/notify/fanotify/fanotify.h | 5 ++- > fs/notify/fanotify/fanotify_user.c | 62 ++++++++++++++++++------------ > fs/notify/mark.c | 47 +++++++++++++++++----- > include/linux/fsnotify_backend.h | 24 +++++++++--- > 5 files changed, 135 insertions(+), 54 deletions(-) > > diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c > index e431f63c9f58..1bbf8b90dd83 100644 > --- a/fs/notify/fanotify/fanotify.c > +++ b/fs/notify/fanotify/fanotify.c > @@ -153,14 +153,16 @@ static u32 fanotify_group_event_mask(struct fsnotify_iter_info *iter_info, > } > > static int fanotify_encode_fid(struct fanotify_event *event, > - const struct path *path, gfp_t gfp) > + const struct path *path, gfp_t gfp, > + __kernel_fsid_t *fsid) > { > struct fanotify_fid *fid = &event->fid; > int dwords, bytes = 0; > - struct kstatfs stat; > int err, type; > > - stat.f_fsid.val[0] = stat.f_fsid.val[1] = 0; > + if (!fsid) > + goto out_err; > + > fid->ext_fh = NULL; > dwords = 0; > err = -ENOENT; > @@ -168,10 +170,6 @@ static int fanotify_encode_fid(struct fanotify_event *event, > if (!dwords) > goto out_err; > > - err = vfs_statfs(path, &stat); > - if (err) > - goto out_err; > - > bytes = dwords << 2; > if (bytes > FANOTIFY_INLINE_FH_LEN) { > /* Treat failure to allocate fh as failure to allocate event */ > @@ -187,14 +185,14 @@ static int fanotify_encode_fid(struct fanotify_event *event, > if (!type || type == FILEID_INVALID || bytes != dwords << 2) > goto out_err; > > - fid->fsid = stat.f_fsid; > + fid->fsid = *fsid; > event->fh_len = bytes; > > return type; > > out_err: > pr_warn_ratelimited("fanotify: failed to encode fid (fsid=%x.%x, type=%d, bytes=%d, err=%i)\n", > - stat.f_fsid.val[0], stat.f_fsid.val[1], > + fsid ? fsid->val[0] : 0, fsid ? fsid->val[1] : 0, > type, bytes, err); > kfree(fid->ext_fh); > fid->ext_fh = NULL; > @@ -204,8 +202,9 @@ static int fanotify_encode_fid(struct fanotify_event *event, > } > > struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, > - struct inode *inode, u32 mask, > - const struct path *path) > + struct inode *inode, u32 mask, > + const struct path *path, > + __kernel_fsid_t *fsid) > { > struct fanotify_event *event = NULL; > gfp_t gfp = GFP_KERNEL_ACCOUNT; > @@ -244,7 +243,7 @@ init: __maybe_unused > event->fh_len = 0; > if (path && FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { > /* Report the event without a file identifier on encode error */ > - event->fh_type = fanotify_encode_fid(event, path, gfp); > + event->fh_type = fanotify_encode_fid(event, path, gfp, fsid); > } else if (path) { > event->fh_type = FILEID_ROOT; > event->path = *path; > @@ -259,6 +258,28 @@ init: __maybe_unused > return event; > } > > +/* > + * Get cached fsid of the filesystem containing the object from any connector. > + * All connectors are supposed to have the same fsid, but we do not verify that > + * here. > + */ > +static __kernel_fsid_t *fanotify_get_fsid(struct fsnotify_iter_info *iter_info, > + __kernel_fsid_t *fsid) > +{ > + int type; > + > + fsnotify_foreach_obj_type(type) { > + if (!fsnotify_iter_should_report_type(iter_info, type)) > + continue; > + > + *fsid = iter_info->marks[type]->connector->fsid; > + if (!WARN_ON_ONCE(!fsid->val[0] && !fsid->val[1])) > + return fsid; > + } > + > + return NULL; > +} > + > static int fanotify_handle_event(struct fsnotify_group *group, > struct inode *inode, > u32 mask, const void *data, int data_type, > @@ -268,6 +289,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, > int ret = 0; > struct fanotify_event *event; > struct fsnotify_event *fsn_event; > + __kernel_fsid_t __fsid, *fsid = NULL; > > BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS); > BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY); > @@ -300,7 +322,10 @@ static int fanotify_handle_event(struct fsnotify_group *group, > return 0; > } > > - event = fanotify_alloc_event(group, inode, mask, data); > + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) > + fsid = fanotify_get_fsid(iter_info, &__fsid); > + > + event = fanotify_alloc_event(group, inode, mask, data, fsid); > ret = -ENOMEM; > if (unlikely(!event)) { > /* > diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h > index 4aafc7144c3d..5b072afa4e19 100644 > --- a/fs/notify/fanotify/fanotify.h > +++ b/fs/notify/fanotify/fanotify.h > @@ -131,5 +131,6 @@ static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse) > } > > struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, > - struct inode *inode, u32 mask, > - const struct path *path); > + struct inode *inode, u32 mask, > + const struct path *path, > + __kernel_fsid_t *fsid); > diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c > index 211ec6332d31..467e6431fbe9 100644 > --- a/fs/notify/fanotify/fanotify_user.c > +++ b/fs/notify/fanotify/fanotify_user.c > @@ -653,7 +653,8 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, > > static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, > fsnotify_connp_t *connp, > - unsigned int type) > + unsigned int type, > + __kernel_fsid_t *fsid) > { > struct fsnotify_mark *mark; > int ret; > @@ -666,7 +667,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, > return ERR_PTR(-ENOMEM); > > fsnotify_init_mark(mark, group); > - ret = fsnotify_add_mark_locked(mark, connp, type, 0); > + ret = fsnotify_add_mark_locked_fsid(mark, connp, type, 0, fsid); > if (ret) { > fsnotify_put_mark(mark); > return ERR_PTR(ret); > @@ -678,7 +679,8 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, > > static int fanotify_add_mark(struct fsnotify_group *group, > fsnotify_connp_t *connp, unsigned int type, > - __u32 mask, unsigned int flags) > + __u32 mask, unsigned int flags, > + __kernel_fsid_t *fsid) > { > struct fsnotify_mark *fsn_mark; > __u32 added; > @@ -686,7 +688,7 @@ static int fanotify_add_mark(struct fsnotify_group *group, > mutex_lock(&group->mark_mutex); > fsn_mark = fsnotify_find_mark(connp, group); > if (!fsn_mark) { > - fsn_mark = fanotify_add_new_mark(group, connp, type); > + fsn_mark = fanotify_add_new_mark(group, connp, type, fsid); > if (IS_ERR(fsn_mark)) { > mutex_unlock(&group->mark_mutex); > return PTR_ERR(fsn_mark); > @@ -703,23 +705,23 @@ static int fanotify_add_mark(struct fsnotify_group *group, > > static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, > struct vfsmount *mnt, __u32 mask, > - unsigned int flags) > + unsigned int flags, __kernel_fsid_t *fsid) > { > return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, > - FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags); > + FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid); > } > > static int fanotify_add_sb_mark(struct fsnotify_group *group, > - struct super_block *sb, __u32 mask, > - unsigned int flags) > + struct super_block *sb, __u32 mask, > + unsigned int flags, __kernel_fsid_t *fsid) > { > return fanotify_add_mark(group, &sb->s_fsnotify_marks, > - FSNOTIFY_OBJ_TYPE_SB, mask, flags); > + FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid); > } > > static int fanotify_add_inode_mark(struct fsnotify_group *group, > struct inode *inode, __u32 mask, > - unsigned int flags) > + unsigned int flags, __kernel_fsid_t *fsid) > { > pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); > > @@ -734,7 +736,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, > return 0; > > return fanotify_add_mark(group, &inode->i_fsnotify_marks, > - FSNOTIFY_OBJ_TYPE_INODE, mask, flags); > + FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid); > } > > /* fanotify syscalls */ > @@ -798,7 +800,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) > atomic_inc(&user->fanotify_listeners); > group->memcg = get_mem_cgroup_from_mm(current->mm); > > - oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL); > + oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL, NULL); > if (unlikely(!oevent)) { > fd = -ENOMEM; > goto out_destroy_group; > @@ -861,9 +863,9 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) > } > > /* Check if filesystem can encode a unique fid */ > -static int fanotify_test_fid(struct path *path) > +static int fanotify_test_fid(struct path *path, struct kstatfs *stat) > { > - struct kstatfs stat, root_stat; > + struct kstatfs root_stat; > struct path root = { > .mnt = path->mnt, > .dentry = path->dentry->d_sb->s_root, > @@ -873,11 +875,11 @@ static int fanotify_test_fid(struct path *path) > /* > * Make sure path is not in filesystem with zero fsid (e.g. tmpfs). > */ > - err = vfs_statfs(path, &stat); > + err = vfs_statfs(path, stat); > if (err) > return err; > > - if (!stat.f_fsid.val[0] && !stat.f_fsid.val[1]) > + if (!stat->f_fsid.val[0] && !stat->f_fsid.val[1]) > return -ENODEV; > > /* > @@ -888,8 +890,8 @@ static int fanotify_test_fid(struct path *path) > if (err) > return err; > > - if (root_stat.f_fsid.val[0] != stat.f_fsid.val[0] || > - root_stat.f_fsid.val[1] != stat.f_fsid.val[1]) > + if (root_stat.f_fsid.val[0] != stat->f_fsid.val[0] || > + root_stat.f_fsid.val[1] != stat->f_fsid.val[1]) > return -EXDEV; > > /* > @@ -914,6 +916,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, > struct fsnotify_group *group; > struct fd f; > struct path path; > + struct kstatfs stat; > + __kernel_fsid_t *fsid = NULL; > u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; > unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; > int ret; > @@ -992,9 +996,11 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, > goto fput_and_out; > > if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { > - ret = fanotify_test_fid(&path); > + ret = fanotify_test_fid(&path, &stat); > if (ret) > goto path_put_and_out; > + > + fsid = &stat.f_fsid; > } > > /* inode held in place by reference to path; group by fget on fd */ > @@ -1007,19 +1013,25 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, > switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) { > case FAN_MARK_ADD: > if (mark_type == FAN_MARK_MOUNT) > - ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags); > + ret = fanotify_add_vfsmount_mark(group, mnt, mask, > + flags, fsid); > else if (mark_type == FAN_MARK_FILESYSTEM) > - ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, flags); > + ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, > + flags, fsid); > else > - ret = fanotify_add_inode_mark(group, inode, mask, flags); > + ret = fanotify_add_inode_mark(group, inode, mask, > + flags, fsid); > break; > case FAN_MARK_REMOVE: > if (mark_type == FAN_MARK_MOUNT) > - ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags); > + ret = fanotify_remove_vfsmount_mark(group, mnt, mask, > + flags); > else if (mark_type == FAN_MARK_FILESYSTEM) > - ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, flags); > + ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, > + flags); > else > - ret = fanotify_remove_inode_mark(group, inode, mask, flags); > + ret = fanotify_remove_inode_mark(group, inode, mask, > + flags); > break; > default: > ret = -EINVAL; > diff --git a/fs/notify/mark.c b/fs/notify/mark.c > index d2dd16cb5989..3c5f39cc7fa3 100644 > --- a/fs/notify/mark.c > +++ b/fs/notify/mark.c > @@ -82,6 +82,7 @@ > #include <linux/slab.h> > #include <linux/spinlock.h> > #include <linux/srcu.h> > +#include <linux/ratelimit.h> > > #include <linux/atomic.h> > > @@ -481,7 +482,8 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) > } > > static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, > - unsigned int type) > + unsigned int type, > + __kernel_fsid_t *fsid) > { > struct inode *inode = NULL; > struct fsnotify_mark_connector *conn; > @@ -493,6 +495,11 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, > INIT_HLIST_HEAD(&conn->list); > conn->type = type; > conn->obj = connp; > + /* Cache fsid of filesystem containing the object */ > + if (fsid) > + conn->fsid = *fsid; > + else > + conn->fsid.val[0] = conn->fsid.val[1] = 0; > if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) > inode = igrab(fsnotify_conn_inode(conn)); > /* > @@ -544,7 +551,7 @@ static struct fsnotify_mark_connector *fsnotify_grab_connector( > */ > static int fsnotify_add_mark_list(struct fsnotify_mark *mark, > fsnotify_connp_t *connp, unsigned int type, > - int allow_dups) > + int allow_dups, __kernel_fsid_t *fsid) > { > struct fsnotify_mark *lmark, *last = NULL; > struct fsnotify_mark_connector *conn; > @@ -553,15 +560,36 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, > > if (WARN_ON(!fsnotify_valid_obj_type(type))) > return -EINVAL; > + > + /* Backend is expected to check for zero fsid (e.g. tmpfs) */ > + if (fsid && WARN_ON_ONCE(!fsid->val[0] && !fsid->val[1])) > + return -ENODEV; > + > restart: > spin_lock(&mark->lock); > conn = fsnotify_grab_connector(connp); > if (!conn) { > spin_unlock(&mark->lock); > - err = fsnotify_attach_connector_to_object(connp, type); > + err = fsnotify_attach_connector_to_object(connp, type, fsid); > if (err) > return err; > goto restart; > + } else if (fsid && (conn->fsid.val[0] || conn->fsid.val[1]) && > + (fsid->val[0] != conn->fsid.val[0] || > + fsid->val[1] != conn->fsid.val[1])) { > + /* > + * Backend is expected to check for non uniform fsid > + * (e.g. btrfs), but maybe we missed something? > + * Only allow setting conn->fsid once to non zero fsid. > + * inotify and non-fid fanotify groups do not set nor test > + * conn->fsid. > + */ > + pr_warn_ratelimited("%s: fsid mismatch on object of type %u: %x.%x != %x.%x\n", > + __func__, conn->type, > + fsid->val[0], fsid->val[1], > + conn->fsid.val[0], conn->fsid.val[1]); > + err = -EXDEV; > + goto out_err; > } > > /* is mark the first mark? */ > @@ -604,9 +632,9 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, > * These marks may be used for the fsnotify backend to determine which > * event types should be delivered to which group. > */ > -int fsnotify_add_mark_locked(struct fsnotify_mark *mark, > - fsnotify_connp_t *connp, unsigned int type, > - int allow_dups) > +int fsnotify_add_mark_locked_fsid(struct fsnotify_mark *mark, > + fsnotify_connp_t *connp, unsigned int type, > + int allow_dups, __kernel_fsid_t *fsid) > { > struct fsnotify_group *group = mark->group; > int ret = 0; > @@ -627,7 +655,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, > fsnotify_get_mark(mark); /* for g_list */ > spin_unlock(&mark->lock); > > - ret = fsnotify_add_mark_list(mark, connp, type, allow_dups); > + ret = fsnotify_add_mark_list(mark, connp, type, allow_dups, fsid); > if (ret) > goto err; > > @@ -648,13 +676,14 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, > } > > int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, > - unsigned int type, int allow_dups) > + unsigned int type, int allow_dups, __kernel_fsid_t *fsid) > { > int ret; > struct fsnotify_group *group = mark->group; > > mutex_lock(&group->mark_mutex); > - ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups); > + ret = fsnotify_add_mark_locked_fsid(mark, connp, type, allow_dups, > + fsid); > mutex_unlock(&group->mark_mutex); > return ret; > } > diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h > index 1e4b88bd1443..b66c4199d629 100644 > --- a/include/linux/fsnotify_backend.h > +++ b/include/linux/fsnotify_backend.h > @@ -293,6 +293,7 @@ typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; > struct fsnotify_mark_connector { > spinlock_t lock; > unsigned int type; /* Type of object [lock] */ > + __kernel_fsid_t fsid; /* fsid of filesystem containing object */ > union { > /* Object pointer [lock] */ > fsnotify_connp_t *obj; > @@ -433,20 +434,32 @@ extern void fsnotify_init_mark(struct fsnotify_mark *mark, > /* Find mark belonging to given group in the list of marks */ > extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp, > struct fsnotify_group *group); > +/* Get cached fsid of filesystem containing object */ > +extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn, > + __kernel_fsid_t *fsid); > /* attach the mark to the object */ > extern int fsnotify_add_mark(struct fsnotify_mark *mark, > fsnotify_connp_t *connp, unsigned int type, > - int allow_dups); > -extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, > - fsnotify_connp_t *connp, unsigned int type, > - int allow_dups); > + int allow_dups, __kernel_fsid_t *fsid); > +extern int fsnotify_add_mark_locked_fsid(struct fsnotify_mark *mark, > + fsnotify_connp_t *connp, > + unsigned int type, int allow_dups, > + __kernel_fsid_t *fsid); > +static inline int fsnotify_add_mark_locked(struct fsnotify_mark *mark, > + fsnotify_connp_t *connp, > + unsigned int type, int allow_dups) > +{ > + return fsnotify_add_mark_locked_fsid(mark, connp, type, allow_dups, > + NULL); > +} > + > /* attach the mark to the inode */ > static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark, > struct inode *inode, > int allow_dups) > { > return fsnotify_add_mark(mark, &inode->i_fsnotify_marks, > - FSNOTIFY_OBJ_TYPE_INODE, allow_dups); > + FSNOTIFY_OBJ_TYPE_INODE, allow_dups, NULL); > } > static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark, > struct inode *inode, > @@ -455,6 +468,7 @@ static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark, > return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks, > FSNOTIFY_OBJ_TYPE_INODE, allow_dups); > } > + > /* given a group and a mark, flag mark to be freed when all references are dropped */ > extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, > struct fsnotify_group *group); > -- > 2.17.1 > -- Jan Kara <jack@xxxxxxxx> SUSE Labs, CR
>From 9d8b60cea74117acea8e5b5d8876bd0094f734b1 Mon Sep 17 00:00:00 2001 From: Amir Goldstein <amir73il@xxxxxxxxx> Date: Thu, 10 Jan 2019 19:04:37 +0200 Subject: [PATCH] fanotify: cache fsid in fsnotify_mark_connector For FAN_REPORT_FID, we need to encode fid with fsid of the filesystem on every event. To avoid having to call vfs_statfs() on every event to get fsid, we store the fsid in fsnotify_mark_connector on the first time we add a mark and on handle event we use the cached fsid. Subsequent calls to add mark on the same object are expected to pass the same fsid, so the call will fail on cached fsid mismatch. If an event is reported on several mark types (inode, mount, filesystem), all connectors should already have the same fsid, so we use the cached fsid from the first connector. [JK: Simplify code flow around fanotify_get_fid() make fsid argument of fsnotify_add_mark_locked() unconditional] Suggested-by: Jan Kara <jack@xxxxxxx> Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> Signed-off-by: Jan Kara <jack@xxxxxxx> --- fs/notify/fanotify/fanotify.c | 50 ++++++++++++++++++++--------- fs/notify/fanotify/fanotify.h | 5 +-- fs/notify/fanotify/fanotify_user.c | 64 ++++++++++++++++++++++---------------- fs/notify/mark.c | 42 ++++++++++++++++++++----- include/linux/fsnotify_backend.h | 18 ++++++++--- 5 files changed, 125 insertions(+), 54 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index b19b049746a1..0815593562d7 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -153,14 +153,13 @@ static u32 fanotify_group_event_mask(struct fsnotify_iter_info *iter_info, } static int fanotify_encode_fid(struct fanotify_event *event, - const struct path *path, gfp_t gfp) + const struct path *path, gfp_t gfp, + __kernel_fsid_t *fsid) { struct fanotify_fid *fid = &event->fid; int dwords, bytes = 0; - struct kstatfs stat; int err, type; - stat.f_fsid.val[0] = stat.f_fsid.val[1] = 0; fid->ext_fh = NULL; dwords = 0; err = -ENOENT; @@ -168,10 +167,6 @@ static int fanotify_encode_fid(struct fanotify_event *event, if (!dwords) goto out_err; - err = vfs_statfs(path, &stat); - if (err) - goto out_err; - bytes = dwords << 2; if (bytes > FANOTIFY_INLINE_FH_LEN) { /* Treat failure to allocate fh as failure to allocate event */ @@ -187,7 +182,7 @@ static int fanotify_encode_fid(struct fanotify_event *event, if (!type || type == FILEID_INVALID || bytes != dwords << 2) goto out_err; - fid->fsid = stat.f_fsid; + fid->fsid = *fsid; event->fh_len = bytes; return type; @@ -195,8 +190,7 @@ static int fanotify_encode_fid(struct fanotify_event *event, out_err: pr_warn_ratelimited("fanotify: failed to encode fid (fsid=%x.%x, " "type=%d, bytes=%d, err=%i)\n", - stat.f_fsid.val[0], stat.f_fsid.val[1], - type, bytes, err); + fsid->val[0], fsid->val[1], type, bytes, err); kfree(fid->ext_fh); fid->ext_fh = NULL; event->fh_len = 0; @@ -205,8 +199,9 @@ static int fanotify_encode_fid(struct fanotify_event *event, } struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, - struct inode *inode, u32 mask, - const struct path *path) + struct inode *inode, u32 mask, + const struct path *path, + __kernel_fsid_t *fsid) { struct fanotify_event *event = NULL; gfp_t gfp = GFP_KERNEL_ACCOUNT; @@ -245,7 +240,7 @@ init: __maybe_unused event->fh_len = 0; if (path && FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { /* Report the event without a file identifier on encode error */ - event->fh_type = fanotify_encode_fid(event, path, gfp); + event->fh_type = fanotify_encode_fid(event, path, gfp, fsid); } else if (path) { event->fh_type = FILEID_ROOT; event->path = *path; @@ -260,6 +255,29 @@ init: __maybe_unused return event; } +/* + * Get cached fsid of the filesystem containing the object from any connector. + * All connectors are supposed to have the same fsid, but we do not verify that + * here. + */ +static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) +{ + int type; + __kernel_fsid_t fsid = {}; + + fsnotify_foreach_obj_type(type) { + if (!fsnotify_iter_should_report_type(iter_info, type)) + continue; + + fsid = iter_info->marks[type]->connector->fsid; + if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1])) + continue; + return fsid; + } + + return fsid; +} + static int fanotify_handle_event(struct fsnotify_group *group, struct inode *inode, u32 mask, const void *data, int data_type, @@ -269,6 +287,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, int ret = 0; struct fanotify_event *event; struct fsnotify_event *fsn_event; + __kernel_fsid_t fsid = {}; BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS); BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY); @@ -301,7 +320,10 @@ static int fanotify_handle_event(struct fsnotify_group *group, return 0; } - event = fanotify_alloc_event(group, inode, mask, data); + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) + fsid = fanotify_get_fsid(iter_info); + + event = fanotify_alloc_event(group, inode, mask, data, &fsid); ret = -ENOMEM; if (unlikely(!event)) { /* diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 4aafc7144c3d..5b072afa4e19 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -131,5 +131,6 @@ static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse) } struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, - struct inode *inode, u32 mask, - const struct path *path); + struct inode *inode, u32 mask, + const struct path *path, + __kernel_fsid_t *fsid); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index a09bc7d1ab87..603419ce096f 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -246,7 +246,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, { struct fanotify_event_metadata metadata; struct fanotify_event *event; - struct file *f; + struct file *f = NULL; int ret, fd = FAN_NOFD; pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event); @@ -653,7 +653,8 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, fsnotify_connp_t *connp, - unsigned int type) + unsigned int type, + __kernel_fsid_t *fsid) { struct fsnotify_mark *mark; int ret; @@ -666,7 +667,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, return ERR_PTR(-ENOMEM); fsnotify_init_mark(mark, group); - ret = fsnotify_add_mark_locked(mark, connp, type, 0); + ret = fsnotify_add_mark_locked(mark, connp, type, 0, fsid); if (ret) { fsnotify_put_mark(mark); return ERR_PTR(ret); @@ -678,7 +679,8 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, static int fanotify_add_mark(struct fsnotify_group *group, fsnotify_connp_t *connp, unsigned int type, - __u32 mask, unsigned int flags) + __u32 mask, unsigned int flags, + __kernel_fsid_t *fsid) { struct fsnotify_mark *fsn_mark; __u32 added; @@ -686,7 +688,7 @@ static int fanotify_add_mark(struct fsnotify_group *group, mutex_lock(&group->mark_mutex); fsn_mark = fsnotify_find_mark(connp, group); if (!fsn_mark) { - fsn_mark = fanotify_add_new_mark(group, connp, type); + fsn_mark = fanotify_add_new_mark(group, connp, type, fsid); if (IS_ERR(fsn_mark)) { mutex_unlock(&group->mark_mutex); return PTR_ERR(fsn_mark); @@ -703,23 +705,23 @@ static int fanotify_add_mark(struct fsnotify_group *group, static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, struct vfsmount *mnt, __u32 mask, - unsigned int flags) + unsigned int flags, __kernel_fsid_t *fsid) { return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags); + FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid); } static int fanotify_add_sb_mark(struct fsnotify_group *group, - struct super_block *sb, __u32 mask, - unsigned int flags) + struct super_block *sb, __u32 mask, + unsigned int flags, __kernel_fsid_t *fsid) { return fanotify_add_mark(group, &sb->s_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_SB, mask, flags); + FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid); } static int fanotify_add_inode_mark(struct fsnotify_group *group, struct inode *inode, __u32 mask, - unsigned int flags) + unsigned int flags, __kernel_fsid_t *fsid) { pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); @@ -734,7 +736,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, return 0; return fanotify_add_mark(group, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, mask, flags); + FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid); } /* fanotify syscalls */ @@ -798,7 +800,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) atomic_inc(&user->fanotify_listeners); group->memcg = get_mem_cgroup_from_mm(current->mm); - oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL); + oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL, NULL); if (unlikely(!oevent)) { fd = -ENOMEM; goto out_destroy_group; @@ -861,9 +863,9 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) } /* Check if filesystem can encode a unique fid */ -static int fanotify_test_fid(struct path *path) +static int fanotify_test_fid(struct path *path, struct kstatfs *stat) { - struct kstatfs stat, root_stat; + struct kstatfs root_stat; struct path root = { .mnt = path->mnt, .dentry = path->dentry->d_sb->s_root, @@ -873,11 +875,11 @@ static int fanotify_test_fid(struct path *path) /* * Make sure path is not in filesystem with zero fsid (e.g. tmpfs). */ - err = vfs_statfs(path, &stat); + err = vfs_statfs(path, stat); if (err) return err; - if (!stat.f_fsid.val[0] && !stat.f_fsid.val[1]) + if (!stat->f_fsid.val[0] && !stat->f_fsid.val[1]) return -ENODEV; /* @@ -888,8 +890,8 @@ static int fanotify_test_fid(struct path *path) if (err) return err; - if (root_stat.f_fsid.val[0] != stat.f_fsid.val[0] || - root_stat.f_fsid.val[1] != stat.f_fsid.val[1]) + if (root_stat.f_fsid.val[0] != stat->f_fsid.val[0] || + root_stat.f_fsid.val[1] != stat->f_fsid.val[1]) return -EXDEV; /* @@ -914,6 +916,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, struct fsnotify_group *group; struct fd f; struct path path; + struct kstatfs stat; + __kernel_fsid_t *fsid = NULL; u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; int ret; @@ -992,9 +996,11 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, goto fput_and_out; if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { - ret = fanotify_test_fid(&path); + ret = fanotify_test_fid(&path, &stat); if (ret) goto path_put_and_out; + + fsid = &stat.f_fsid; } /* inode held in place by reference to path; group by fget on fd */ @@ -1007,19 +1013,25 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) { case FAN_MARK_ADD: if (mark_type == FAN_MARK_MOUNT) - ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags); + ret = fanotify_add_vfsmount_mark(group, mnt, mask, + flags, fsid); else if (mark_type == FAN_MARK_FILESYSTEM) - ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, flags); + ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, + flags, fsid); else - ret = fanotify_add_inode_mark(group, inode, mask, flags); + ret = fanotify_add_inode_mark(group, inode, mask, + flags, fsid); break; case FAN_MARK_REMOVE: if (mark_type == FAN_MARK_MOUNT) - ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags); + ret = fanotify_remove_vfsmount_mark(group, mnt, mask, + flags); else if (mark_type == FAN_MARK_FILESYSTEM) - ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, flags); + ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, + flags); else - ret = fanotify_remove_inode_mark(group, inode, mask, flags); + ret = fanotify_remove_inode_mark(group, inode, mask, + flags); break; default: ret = -EINVAL; diff --git a/fs/notify/mark.c b/fs/notify/mark.c index d2dd16cb5989..d593d4269561 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -82,6 +82,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/srcu.h> +#include <linux/ratelimit.h> #include <linux/atomic.h> @@ -481,7 +482,8 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) } static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, - unsigned int type) + unsigned int type, + __kernel_fsid_t *fsid) { struct inode *inode = NULL; struct fsnotify_mark_connector *conn; @@ -493,6 +495,11 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, INIT_HLIST_HEAD(&conn->list); conn->type = type; conn->obj = connp; + /* Cache fsid of filesystem containing the object */ + if (fsid) + conn->fsid = *fsid; + else + conn->fsid.val[0] = conn->fsid.val[1] = 0; if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) inode = igrab(fsnotify_conn_inode(conn)); /* @@ -544,7 +551,7 @@ static struct fsnotify_mark_connector *fsnotify_grab_connector( */ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, fsnotify_connp_t *connp, unsigned int type, - int allow_dups) + int allow_dups, __kernel_fsid_t *fsid) { struct fsnotify_mark *lmark, *last = NULL; struct fsnotify_mark_connector *conn; @@ -553,15 +560,36 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, if (WARN_ON(!fsnotify_valid_obj_type(type))) return -EINVAL; + + /* Backend is expected to check for zero fsid (e.g. tmpfs) */ + if (fsid && WARN_ON_ONCE(!fsid->val[0] && !fsid->val[1])) + return -ENODEV; + restart: spin_lock(&mark->lock); conn = fsnotify_grab_connector(connp); if (!conn) { spin_unlock(&mark->lock); - err = fsnotify_attach_connector_to_object(connp, type); + err = fsnotify_attach_connector_to_object(connp, type, fsid); if (err) return err; goto restart; + } else if (fsid && (conn->fsid.val[0] || conn->fsid.val[1]) && + (fsid->val[0] != conn->fsid.val[0] || + fsid->val[1] != conn->fsid.val[1])) { + /* + * Backend is expected to check for non uniform fsid + * (e.g. btrfs), but maybe we missed something? + * Only allow setting conn->fsid once to non zero fsid. + * inotify and non-fid fanotify groups do not set nor test + * conn->fsid. + */ + pr_warn_ratelimited("%s: fsid mismatch on object of type %u: " + "%x.%x != %x.%x\n", __func__, conn->type, + fsid->val[0], fsid->val[1], + conn->fsid.val[0], conn->fsid.val[1]); + err = -EXDEV; + goto out_err; } /* is mark the first mark? */ @@ -606,7 +634,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, */ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_connp_t *connp, unsigned int type, - int allow_dups) + int allow_dups, __kernel_fsid_t *fsid) { struct fsnotify_group *group = mark->group; int ret = 0; @@ -627,7 +655,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_get_mark(mark); /* for g_list */ spin_unlock(&mark->lock); - ret = fsnotify_add_mark_list(mark, connp, type, allow_dups); + ret = fsnotify_add_mark_list(mark, connp, type, allow_dups, fsid); if (ret) goto err; @@ -648,13 +676,13 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, } int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, - unsigned int type, int allow_dups) + unsigned int type, int allow_dups, __kernel_fsid_t *fsid) { int ret; struct fsnotify_group *group = mark->group; mutex_lock(&group->mark_mutex); - ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups); + ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups, fsid); mutex_unlock(&group->mark_mutex); return ret; } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 1e4b88bd1443..7b93f15b4944 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -293,6 +293,7 @@ typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; struct fsnotify_mark_connector { spinlock_t lock; unsigned int type; /* Type of object [lock] */ + __kernel_fsid_t fsid; /* fsid of filesystem containing object */ union { /* Object pointer [lock] */ fsnotify_connp_t *obj; @@ -433,28 +434,35 @@ extern void fsnotify_init_mark(struct fsnotify_mark *mark, /* Find mark belonging to given group in the list of marks */ extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp, struct fsnotify_group *group); +/* Get cached fsid of filesystem containing object */ +extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn, + __kernel_fsid_t *fsid); /* attach the mark to the object */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, unsigned int type, - int allow_dups); + int allow_dups, __kernel_fsid_t *fsid); extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, unsigned int type, - int allow_dups); + fsnotify_connp_t *connp, + unsigned int type, int allow_dups, + __kernel_fsid_t *fsid); + /* attach the mark to the inode */ static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark, struct inode *inode, int allow_dups) { return fsnotify_add_mark(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, allow_dups); + FSNOTIFY_OBJ_TYPE_INODE, allow_dups, NULL); } static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark, struct inode *inode, int allow_dups) { return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, allow_dups); + FSNOTIFY_OBJ_TYPE_INODE, allow_dups, + NULL); } + /* given a group and a mark, flag mark to be freed when all references are dropped */ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, struct fsnotify_group *group); -- 2.16.4