On Sat, Nov 18, 2023 at 8:30 PM Amir Goldstein <amir73il@xxxxxxxxx> wrote: > > So far, fanotify returns -ENODEV or -EXDEV when trying to set a mark > on a filesystem with a "weak" fsid, namely, zero fsid (e.g. fuse), or > non-uniform fsid (e.g. btrfs non-root subvol). > > When group is watching inodes all from the same filesystem (or subvol), > allow adding inode marks with "weak" fsid, because there is no ambiguity > regarding which filesystem reports the event. > > The first mark added to a group determines if this group is single or > multi filesystem, depending on the fsid at the path of the added mark. > > If the first mark added has a "strong" fsid, marks with "weak" fsid > cannot be added and vice versa. > > If the first mark added has a "weak" fsid, following marks must have > the same "weak" fsid and the same sb as the first mark. > > Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> > --- > fs/notify/fanotify/fanotify.c | 15 +---- > fs/notify/fanotify/fanotify.h | 6 ++ > fs/notify/fanotify/fanotify_user.c | 97 ++++++++++++++++++++++++------ > include/linux/fsnotify_backend.h | 1 + > 4 files changed, 90 insertions(+), 29 deletions(-) > > diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c > index aff1ab3c32aa..1e4def21811e 100644 > --- a/fs/notify/fanotify/fanotify.c > +++ b/fs/notify/fanotify/fanotify.c > @@ -29,12 +29,6 @@ static unsigned int fanotify_hash_path(const struct path *path) > hash_ptr(path->mnt, FANOTIFY_EVENT_HASH_BITS); > } > > -static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1, > - __kernel_fsid_t *fsid2) > -{ > - return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1]; > -} > - > static unsigned int fanotify_hash_fsid(__kernel_fsid_t *fsid) > { > return hash_32(fsid->val[0], FANOTIFY_EVENT_HASH_BITS) ^ > @@ -851,7 +845,8 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) > if (!(mark->flags & FSNOTIFY_MARK_FLAG_HAS_FSID)) > continue; > fsid = FANOTIFY_MARK(mark)->fsid; > - if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1])) > + if (!(mark->flags & FSNOTIFY_MARK_FLAG_WEAK_FSID) && > + WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1])) > continue; > return fsid; > } > @@ -933,12 +928,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, > return 0; > } > > - if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS)) { > + if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS)) > fsid = fanotify_get_fsid(iter_info); > - /* Racing with mark destruction or creation? */ > - if (!fsid.val[0] && !fsid.val[1]) > - return 0; > - } > > event = fanotify_alloc_event(group, mask, data, data_type, dir, > file_name, &fsid, match_mask); > diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h > index 2847fa564298..9c97ae638ac5 100644 > --- a/fs/notify/fanotify/fanotify.h > +++ b/fs/notify/fanotify/fanotify.h > @@ -504,6 +504,12 @@ static inline __kernel_fsid_t *fanotify_mark_fsid(struct fsnotify_mark *mark) > return &FANOTIFY_MARK(mark)->fsid; > } > > +static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1, > + __kernel_fsid_t *fsid2) > +{ > + return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1]; > +} > + > static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark) > { > unsigned int mflags = 0; > diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c > index e3d836d4d156..1cc68ea56c17 100644 > --- a/fs/notify/fanotify/fanotify_user.c > +++ b/fs/notify/fanotify/fanotify_user.c > @@ -23,7 +23,7 @@ > > #include <asm/ioctls.h> > > -#include "../../mount.h" > +#include "../fsnotify.h" > #include "../fdinfo.h" > #include "fanotify.h" > > @@ -1192,11 +1192,68 @@ static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, > return recalc; > } > > +struct fan_fsid { > + struct super_block *sb; > + __kernel_fsid_t id; > + bool weak; > +}; > + > +static int fanotify_check_mark_fsid(struct fsnotify_group *group, > + struct fsnotify_mark *mark, > + struct fan_fsid *fsid) > +{ > + struct fsnotify_mark_connector *conn; > + struct fsnotify_mark *old; > + struct super_block *old_sb = NULL; > + > + *fanotify_mark_fsid(mark) = fsid->id; > + mark->flags |= FSNOTIFY_MARK_FLAG_HAS_FSID; > + if (fsid->weak) > + mark->flags |= FSNOTIFY_MARK_FLAG_WEAK_FSID; > + > + /* First mark added will determine if group is single or multi fsid */ > + if (list_empty(&group->marks_list)) > + return 0; > + > + /* Find sb of an existing mark */ > + list_for_each_entry(old, &group->marks_list, g_list) { > + conn = READ_ONCE(old->connector); > + if (!conn) > + continue; > + old_sb = fsnotify_connector_sb(conn); > + if (old_sb) > + break; > + } > + > + /* Only detached marks left? */ > + if (!old_sb) > + return 0; > + > + /* Do not allow mixing of marks with weak and strong fsid */ > + if ((mark->flags ^ old->flags) & FSNOTIFY_MARK_FLAG_WEAK_FSID) > + return -EXDEV; > + > + /* Allow mixing of marks with strong fsid from different fs */ > + if (!fsid->weak) > + return 0; > + > + /* Do not allow mixing marks with weak fsid from different fs */ > + if (old_sb != fsid->sb) > + return -EXDEV; > + > + /* Do not allow mixing marks from different btrfs sub-volumes */ > + if (!fanotify_fsid_equal(fanotify_mark_fsid(old), > + fanotify_mark_fsid(mark))) > + return -EXDEV; > + > + return 0; > +} > + > static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, > fsnotify_connp_t *connp, > unsigned int obj_type, > unsigned int fan_flags, > - __kernel_fsid_t *fsid) > + struct fan_fsid *fsid) > { > struct ucounts *ucounts = group->fanotify_data.ucounts; > struct fanotify_mark *fan_mark; > @@ -1225,8 +1282,9 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, > > /* Cache fsid of filesystem containing the marked object */ > if (fsid) { > - fan_mark->fsid = *fsid; > - mark->flags |= FSNOTIFY_MARK_FLAG_HAS_FSID; > + ret = fanotify_check_mark_fsid(group, mark, fsid); > + if (ret) OOPS, missed fsnotify_put_mark(mark); better add a goto target out_put_mark as this is the second case now. > + goto out_dec_ucounts; > } else { > fan_mark->fsid.val[0] = fan_mark->fsid.val[1] = 0; > } > @@ -1289,7 +1347,7 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark, > static int fanotify_add_mark(struct fsnotify_group *group, > fsnotify_connp_t *connp, unsigned int obj_type, > __u32 mask, unsigned int fan_flags, > - __kernel_fsid_t *fsid) > + struct fan_fsid *fsid) > { > struct fsnotify_mark *fsn_mark; > bool recalc; > @@ -1337,7 +1395,7 @@ static int fanotify_add_mark(struct fsnotify_group *group, > > static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, > struct vfsmount *mnt, __u32 mask, > - unsigned int flags, __kernel_fsid_t *fsid) > + unsigned int flags, struct fan_fsid *fsid) > { > return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, > FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid); > @@ -1345,7 +1403,7 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, > > static int fanotify_add_sb_mark(struct fsnotify_group *group, > struct super_block *sb, __u32 mask, > - unsigned int flags, __kernel_fsid_t *fsid) > + unsigned int flags, struct fan_fsid *fsid) > { > return fanotify_add_mark(group, &sb->s_fsnotify_marks, > FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid); > @@ -1353,7 +1411,7 @@ static int fanotify_add_sb_mark(struct fsnotify_group *group, > > static int fanotify_add_inode_mark(struct fsnotify_group *group, > struct inode *inode, __u32 mask, > - unsigned int flags, __kernel_fsid_t *fsid) > + unsigned int flags, struct fan_fsid *fsid) > { > pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); > > @@ -1564,20 +1622,25 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) > return fd; > } > > -static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid) > +static int fanotify_test_fsid(struct dentry *dentry, unsigned int flags, > + struct fan_fsid *fsid) > { > + unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; > __kernel_fsid_t root_fsid; > int err; > > /* > * Make sure dentry is not of a filesystem with zero fsid (e.g. fuse). > */ > - err = vfs_get_fsid(dentry, fsid); > + err = vfs_get_fsid(dentry, &fsid->id); > if (err) > return err; > > - if (!fsid->val[0] && !fsid->val[1]) > - return -ENODEV; > + /* Allow weak fsid when marking inodes */ > + fsid->sb = dentry->d_sb; > + fsid->weak = mark_type == FAN_MARK_INODE; > + if (!fsid->id.val[0] && !fsid->id.val[1]) > + return fsid->weak ? 0 : -ENODEV; > > /* > * Make sure dentry is not of a filesystem subvolume (e.g. btrfs) > @@ -1587,10 +1650,10 @@ static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid) > if (err) > return err; > > - if (root_fsid.val[0] != fsid->val[0] || > - root_fsid.val[1] != fsid->val[1]) > - return -EXDEV; > + if (!fanotify_fsid_equal(&root_fsid, &fsid->id)) > + return fsid->weak ? 0 : -EXDEV; > > + fsid->weak = false; > return 0; > } > > @@ -1675,7 +1738,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, > struct fsnotify_group *group; > struct fd f; > struct path path; > - __kernel_fsid_t __fsid, *fsid = NULL; > + struct fan_fsid __fsid, *fsid = NULL; > u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; > unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; > unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; > @@ -1827,7 +1890,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, > } > > if (fid_mode) { > - ret = fanotify_test_fsid(path.dentry, &__fsid); > + ret = fanotify_test_fsid(path.dentry, flags, &__fsid); > if (ret) > goto path_put_and_out; > > diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h > index a80b525ca653..7f63be5ca0f1 100644 > --- a/include/linux/fsnotify_backend.h > +++ b/include/linux/fsnotify_backend.h > @@ -529,6 +529,7 @@ struct fsnotify_mark { > #define FSNOTIFY_MARK_FLAG_NO_IREF 0x0200 > #define FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS 0x0400 > #define FSNOTIFY_MARK_FLAG_HAS_FSID 0x0800 > +#define FSNOTIFY_MARK_FLAG_WEAK_FSID 0x1000 > unsigned int flags; /* flags [mark->lock] */ > }; > > -- > 2.34.1 >