For FAN_REPORT_FID, we need to encode fid with fsid of the filesystem on every event. To avoid having to call vfs_statfs() on every event to get fsid, we store the fsid in fsnotify_mark_connector on the first time we add a mark and on handle event we use the cached fsid. Subsequent calls to add mark on the same object are expected to pass the same fsid, so the call will fail on cached fsid mismatch. Similarly, if an event is reported on several mark types (inode, mount, filesystem), all connectors should already have the same fsid and we warn if we detect a mismatch. Suggested-by: Jan Kara <jack@xxxxxxx> Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> --- fs/notify/fanotify/fanotify.c | 42 ++++++++++------ fs/notify/fanotify/fanotify.h | 5 +- fs/notify/fanotify/fanotify_user.c | 62 ++++++++++++++---------- fs/notify/mark.c | 77 ++++++++++++++++++++++++++---- include/linux/fsnotify_backend.h | 24 ++++++++-- 5 files changed, 154 insertions(+), 56 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 844b748f0b74..90bac98dd8c7 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -92,7 +92,7 @@ static int fanotify_get_response(struct fsnotify_group *group, pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__, group, event, ret); - + return ret; } @@ -103,13 +103,13 @@ static int fanotify_get_response(struct fsnotify_group *group, * requested by the user, will not be present in the returned mask. */ static u32 fanotify_group_event_mask(struct fsnotify_iter_info *iter_info, - u32 event_mask, const void *data, - int data_type) + u32 event_mask, const void *data, + int data_type, __kernel_fsid_t *fsid) { __u32 marks_mask = 0, marks_ignored_mask = 0; const struct path *path = data; struct fsnotify_mark *mark; - int type; + int type, err; pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n", __func__, iter_info->report_mask, event_mask, data, data_type); @@ -136,6 +136,13 @@ static u32 fanotify_group_event_mask(struct fsnotify_iter_info *iter_info, !(mark->mask & FS_EVENT_ON_CHILD))) continue; + if (fsid) { + err = fsnotify_get_conn_fsid(mark->connector, fsid); + /* Should we skip mark with null or conflicting fsid? */ + pr_debug("%s: fsid=%x.%x type=%u err=%d\n", __func__, + fsid->val[0], fsid->val[1], type, err); + } + marks_mask |= mark->mask; marks_ignored_mask |= mark->ignored_mask; } @@ -153,7 +160,6 @@ static struct fanotify_event_fid *fanotify_alloc_fid(const struct path *path, { struct fanotify_event_fid *fid = NULL; int dwords, bytes = 0; - struct kstatfs stat; int err, type; dwords = 0; @@ -162,10 +168,6 @@ static struct fanotify_event_fid *fanotify_alloc_fid(const struct path *path, if (!dwords) goto out_err; - err = vfs_statfs(path, &stat); - if (err) - goto out_err; - /* Treat failure to allocate fid as failure to allocate event */ bytes = dwords << 2; fid = kmalloc(FANOTIFY_FID_LEN(bytes), gfp); @@ -180,7 +182,6 @@ static struct fanotify_event_fid *fanotify_alloc_fid(const struct path *path, fid->handle_bytes = bytes; fid->handle_type = type; - fid->fsid = stat.f_fsid; return fid; @@ -192,8 +193,9 @@ static struct fanotify_event_fid *fanotify_alloc_fid(const struct path *path, } struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, - struct inode *inode, u32 mask, - const struct path *path) + struct inode *inode, u32 mask, + const struct path *path, + __kernel_fsid_t *fsid) { struct fanotify_event *event = NULL; struct fanotify_event_fid *fid = NULL; @@ -210,7 +212,7 @@ struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, /* Whoever is interested in the event, pays for the allocation. */ memalloc_use_memcg(group->memcg); - if (path && FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { + if (path && fsid && FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { fid = fanotify_alloc_fid(path, gfp); /* Treat failure to allocate fid as failure to allocate event */ if (!fid) @@ -218,6 +220,8 @@ struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, /* Report the event without a file identifier on encode error */ if (IS_ERR(fid)) fid = NULL; + else + fid->fsid = *fsid; } if (fanotify_is_perm_event(mask)) { @@ -262,6 +266,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, int ret = 0; struct fanotify_event *event; struct fsnotify_event *fsn_event; + __kernel_fsid_t __fsid, *fsid = NULL; BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS); BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY); @@ -278,7 +283,14 @@ static int fanotify_handle_event(struct fsnotify_group *group, BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 12); - mask = fanotify_group_event_mask(iter_info, mask, data, data_type); + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { + __fsid.val[0] = 0; + __fsid.val[1] = 0; + fsid = &__fsid; + } + + mask = fanotify_group_event_mask(iter_info, mask, data, data_type, + fsid); if (!mask) return 0; @@ -294,7 +306,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, return 0; } - event = fanotify_alloc_event(group, inode, mask, data); + event = fanotify_alloc_event(group, inode, mask, data, fsid); ret = -ENOMEM; if (unlikely(!event)) { /* diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 0e57fa0674d7..b9938626b2a3 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -99,5 +99,6 @@ static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse) } struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, - struct inode *inode, u32 mask, - const struct path *path); + struct inode *inode, u32 mask, + const struct path *path, + __kernel_fsid_t *fsid); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index d7aa2f392a64..92afb297fdea 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -656,7 +656,8 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, fsnotify_connp_t *connp, - unsigned int type) + unsigned int type, + __kernel_fsid_t *fsid) { struct fsnotify_mark *mark; int ret; @@ -669,7 +670,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, return ERR_PTR(-ENOMEM); fsnotify_init_mark(mark, group); - ret = fsnotify_add_mark_locked(mark, connp, type, 0); + ret = fsnotify_add_mark_locked_fsid(mark, connp, type, 0, fsid); if (ret) { fsnotify_put_mark(mark); return ERR_PTR(ret); @@ -681,7 +682,8 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, static int fanotify_add_mark(struct fsnotify_group *group, fsnotify_connp_t *connp, unsigned int type, - __u32 mask, unsigned int flags) + __u32 mask, unsigned int flags, + __kernel_fsid_t *fsid) { struct fsnotify_mark *fsn_mark; __u32 added; @@ -689,7 +691,7 @@ static int fanotify_add_mark(struct fsnotify_group *group, mutex_lock(&group->mark_mutex); fsn_mark = fsnotify_find_mark(connp, group); if (!fsn_mark) { - fsn_mark = fanotify_add_new_mark(group, connp, type); + fsn_mark = fanotify_add_new_mark(group, connp, type, fsid); if (IS_ERR(fsn_mark)) { mutex_unlock(&group->mark_mutex); return PTR_ERR(fsn_mark); @@ -706,23 +708,23 @@ static int fanotify_add_mark(struct fsnotify_group *group, static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, struct vfsmount *mnt, __u32 mask, - unsigned int flags) + unsigned int flags, __kernel_fsid_t *fsid) { return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags); + FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid); } static int fanotify_add_sb_mark(struct fsnotify_group *group, - struct super_block *sb, __u32 mask, - unsigned int flags) + struct super_block *sb, __u32 mask, + unsigned int flags, __kernel_fsid_t *fsid) { return fanotify_add_mark(group, &sb->s_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_SB, mask, flags); + FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid); } static int fanotify_add_inode_mark(struct fsnotify_group *group, struct inode *inode, __u32 mask, - unsigned int flags) + unsigned int flags, __kernel_fsid_t *fsid) { pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); @@ -737,7 +739,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, return 0; return fanotify_add_mark(group, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, mask, flags); + FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid); } /* fanotify syscalls */ @@ -797,7 +799,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) atomic_inc(&user->fanotify_listeners); group->memcg = get_mem_cgroup_from_mm(current->mm); - oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL); + oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL, NULL); if (unlikely(!oevent)) { fd = -ENOMEM; goto out_destroy_group; @@ -860,20 +862,20 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) } /* Check if filesystem can encode a unique fid */ -static int fanotify_test_fid(struct path *path) +static int fanotify_test_fid(struct path *path, struct kstatfs *stat) { - struct kstatfs stat, root_stat; + struct kstatfs root_stat; int err; /* * Make sure path is not in filesystem with zero fsid (e.g. tmpfs). * TODO: cache fsid in the mark connector. */ - err = vfs_statfs(path, &stat); + err = vfs_statfs(path, stat); if (err) return err; - if (!stat.f_fsid.val[0] && !stat.f_fsid.val[1]) + if (!stat->f_fsid.val[0] && !stat->f_fsid.val[1]) return -ENODEV; /* @@ -884,8 +886,8 @@ static int fanotify_test_fid(struct path *path) if (err) return err; - if (root_stat.f_fsid.val[0] != stat.f_fsid.val[0] || - root_stat.f_fsid.val[1] != stat.f_fsid.val[1]) + if (root_stat.f_fsid.val[0] != stat->f_fsid.val[0] || + root_stat.f_fsid.val[1] != stat->f_fsid.val[1]) return -EXDEV; /* @@ -910,6 +912,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, struct fsnotify_group *group; struct fd f; struct path path; + struct kstatfs stat; + __kernel_fsid_t *fsid = NULL; u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; int ret; @@ -988,9 +992,11 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, goto fput_and_out; if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { - ret = fanotify_test_fid(&path); + ret = fanotify_test_fid(&path, &stat); if (ret) goto path_put_and_out; + + fsid = &stat.f_fsid; } /* inode held in place by reference to path; group by fget on fd */ @@ -1003,19 +1009,25 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) { case FAN_MARK_ADD: if (mark_type == FAN_MARK_MOUNT) - ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags); + ret = fanotify_add_vfsmount_mark(group, mnt, mask, + flags, fsid); else if (mark_type == FAN_MARK_FILESYSTEM) - ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, flags); + ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, + flags, fsid); else - ret = fanotify_add_inode_mark(group, inode, mask, flags); + ret = fanotify_add_inode_mark(group, inode, mask, + flags, fsid); break; case FAN_MARK_REMOVE: if (mark_type == FAN_MARK_MOUNT) - ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags); + ret = fanotify_remove_vfsmount_mark(group, mnt, mask, + flags); else if (mark_type == FAN_MARK_FILESYSTEM) - ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, flags); + ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, + flags); else - ret = fanotify_remove_inode_mark(group, inode, mask, flags); + ret = fanotify_remove_inode_mark(group, inode, mask, + flags); break; default: ret = -EINVAL; diff --git a/fs/notify/mark.c b/fs/notify/mark.c index d2dd16cb5989..c7d8e487c348 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -82,6 +82,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/srcu.h> +#include <linux/ratelimit.h> #include <linux/atomic.h> @@ -480,8 +481,54 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) return -1; } +/* + * Check consistent fsid for all marks on the same object. + * It is expected to always get the same fsid (or no fsid) for all + * marks added to object. + */ +static int fsnotify_test_conn_fsid(const struct fsnotify_mark_connector *conn, + const __kernel_fsid_t *fsid, + const char *where) + +{ + /* + * Backend is expected to check for non uniform fsid (e.g. btrfs), + * but maybe we missed something? + */ + if (fsid->val[0] != conn->fsid.val[0] || + fsid->val[1] != conn->fsid.val[1]) { + pr_warn_ratelimited("%s: fsid mismatch on object of type %u: %x.%x != %x.%x\n", + where, conn->type, + fsid->val[0], fsid->val[1], + conn->fsid.val[0], conn->fsid.val[1]); + return -EXDEV; + } + + return 0; +} + +/* + * Get cached fsid of filesystem containing object from connector. + */ +int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn, + __kernel_fsid_t *fsid) +{ + if (WARN_ON(!conn->fsid.val[0] && !conn->fsid.val[1])) + return -ENODEV; + + if (!fsid->val[0] && !fsid->val[1]) { + *fsid = conn->fsid; + return 0; + } + + return fsnotify_test_conn_fsid(conn, fsid, __func__); +} + +static const __kernel_fsid_t null_fsid; + static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, - unsigned int type) + unsigned int type, + __kernel_fsid_t *fsid) { struct inode *inode = NULL; struct fsnotify_mark_connector *conn; @@ -493,6 +540,8 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, INIT_HLIST_HEAD(&conn->list); conn->type = type; conn->obj = connp; + /* Cache fsid of filesystem containing the object */ + conn->fsid = fsid ? *fsid : null_fsid; if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) inode = igrab(fsnotify_conn_inode(conn)); /* @@ -544,7 +593,7 @@ static struct fsnotify_mark_connector *fsnotify_grab_connector( */ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, fsnotify_connp_t *connp, unsigned int type, - int allow_dups) + int allow_dups, __kernel_fsid_t *fsid) { struct fsnotify_mark *lmark, *last = NULL; struct fsnotify_mark_connector *conn; @@ -553,15 +602,24 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, if (WARN_ON(!fsnotify_valid_obj_type(type))) return -EINVAL; + + /* Backend is expected to check for zero fsid (e.g. tmpfs) */ + if (fsid && WARN_ON(!fsid->val[0] && !fsid->val[1])) + return -ENODEV; + restart: spin_lock(&mark->lock); conn = fsnotify_grab_connector(connp); if (!conn) { spin_unlock(&mark->lock); - err = fsnotify_attach_connector_to_object(connp, type); + err = fsnotify_attach_connector_to_object(connp, type, fsid); if (err) return err; goto restart; + } else if (fsid) { + err = fsnotify_test_conn_fsid(conn, fsid, __func__); + if (err) + goto out_err; } /* is mark the first mark? */ @@ -604,9 +662,9 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, * These marks may be used for the fsnotify backend to determine which * event types should be delivered to which group. */ -int fsnotify_add_mark_locked(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, unsigned int type, - int allow_dups) +int fsnotify_add_mark_locked_fsid(struct fsnotify_mark *mark, + fsnotify_connp_t *connp, unsigned int type, + int allow_dups, __kernel_fsid_t *fsid) { struct fsnotify_group *group = mark->group; int ret = 0; @@ -627,7 +685,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_get_mark(mark); /* for g_list */ spin_unlock(&mark->lock); - ret = fsnotify_add_mark_list(mark, connp, type, allow_dups); + ret = fsnotify_add_mark_list(mark, connp, type, allow_dups, fsid); if (ret) goto err; @@ -648,13 +706,14 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, } int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, - unsigned int type, int allow_dups) + unsigned int type, int allow_dups, __kernel_fsid_t *fsid) { int ret; struct fsnotify_group *group = mark->group; mutex_lock(&group->mark_mutex); - ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups); + ret = fsnotify_add_mark_locked_fsid(mark, connp, type, allow_dups, + fsid); mutex_unlock(&group->mark_mutex); return ret; } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index c3cb692c48a0..d27106efd9b0 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -294,6 +294,7 @@ typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; struct fsnotify_mark_connector { spinlock_t lock; unsigned int type; /* Type of object [lock] */ + __kernel_fsid_t fsid; /* fsid of filesystem containing object */ union { /* Object pointer [lock] */ fsnotify_connp_t *obj; @@ -434,20 +435,32 @@ extern void fsnotify_init_mark(struct fsnotify_mark *mark, /* Find mark belonging to given group in the list of marks */ extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp, struct fsnotify_group *group); +/* Get cached fsid of filesystem containing object */ +extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn, + __kernel_fsid_t *fsid); /* attach the mark to the object */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, unsigned int type, - int allow_dups); -extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, unsigned int type, - int allow_dups); + int allow_dups, __kernel_fsid_t *fsid); +extern int fsnotify_add_mark_locked_fsid(struct fsnotify_mark *mark, + fsnotify_connp_t *connp, + unsigned int type, int allow_dups, + __kernel_fsid_t *fsid); +static inline int fsnotify_add_mark_locked(struct fsnotify_mark *mark, + fsnotify_connp_t *connp, + unsigned int type, int allow_dups) +{ + return fsnotify_add_mark_locked_fsid(mark, connp, type, allow_dups, + NULL); +} + /* attach the mark to the inode */ static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark, struct inode *inode, int allow_dups) { return fsnotify_add_mark(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, allow_dups); + FSNOTIFY_OBJ_TYPE_INODE, allow_dups, NULL); } static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark, struct inode *inode, @@ -456,6 +469,7 @@ static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark, return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks, FSNOTIFY_OBJ_TYPE_INODE, allow_dups); } + /* given a group and a mark, flag mark to be freed when all references are dropped */ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, struct fsnotify_group *group); -- 2.17.1