On Mon, Nov 20, 2023 at 9:42 AM Amir Goldstein <amir73il@xxxxxxxxx> wrote: > > On Sat, Nov 18, 2023 at 8:30 PM Amir Goldstein <amir73il@xxxxxxxxx> wrote: > > > > So far, fanotify returns -ENODEV or -EXDEV when trying to set a mark > > on a filesystem with a "weak" fsid, namely, zero fsid (e.g. fuse), or > > non-uniform fsid (e.g. btrfs non-root subvol). > > > > When group is watching inodes all from the same filesystem (or subvol), > > allow adding inode marks with "weak" fsid, because there is no ambiguity > > regarding which filesystem reports the event. > > > > The first mark added to a group determines if this group is single or > > multi filesystem, depending on the fsid at the path of the added mark. > > > > If the first mark added has a "strong" fsid, marks with "weak" fsid > > cannot be added and vice versa. > > > > If the first mark added has a "weak" fsid, following marks must have > > the same "weak" fsid and the same sb as the first mark. > > > > Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> > > --- > > fs/notify/fanotify/fanotify.c | 15 +---- > > fs/notify/fanotify/fanotify.h | 6 ++ > > fs/notify/fanotify/fanotify_user.c | 97 ++++++++++++++++++++++++------ > > include/linux/fsnotify_backend.h | 1 + > > 4 files changed, 90 insertions(+), 29 deletions(-) > > > > diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c > > index aff1ab3c32aa..1e4def21811e 100644 > > --- a/fs/notify/fanotify/fanotify.c > > +++ b/fs/notify/fanotify/fanotify.c > > @@ -29,12 +29,6 @@ static unsigned int fanotify_hash_path(const struct path *path) > > hash_ptr(path->mnt, FANOTIFY_EVENT_HASH_BITS); > > } > > > > -static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1, > > - __kernel_fsid_t *fsid2) > > -{ > > - return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1]; > > -} > > - > > static unsigned int fanotify_hash_fsid(__kernel_fsid_t *fsid) > > { > > return hash_32(fsid->val[0], FANOTIFY_EVENT_HASH_BITS) ^ > > @@ -851,7 +845,8 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) > > if (!(mark->flags & FSNOTIFY_MARK_FLAG_HAS_FSID)) > > continue; > > fsid = FANOTIFY_MARK(mark)->fsid; > > - if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1])) > > + if (!(mark->flags & FSNOTIFY_MARK_FLAG_WEAK_FSID) && > > + WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1])) > > continue; > > return fsid; > > } > > @@ -933,12 +928,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, > > return 0; > > } > > > > - if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS)) { > > + if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS)) > > fsid = fanotify_get_fsid(iter_info); > > - /* Racing with mark destruction or creation? */ > > - if (!fsid.val[0] && !fsid.val[1]) > > - return 0; > > - } > > > > event = fanotify_alloc_event(group, mask, data, data_type, dir, > > file_name, &fsid, match_mask); > > diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h > > index 2847fa564298..9c97ae638ac5 100644 > > --- a/fs/notify/fanotify/fanotify.h > > +++ b/fs/notify/fanotify/fanotify.h > > @@ -504,6 +504,12 @@ static inline __kernel_fsid_t *fanotify_mark_fsid(struct fsnotify_mark *mark) > > return &FANOTIFY_MARK(mark)->fsid; > > } > > > > +static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1, > > + __kernel_fsid_t *fsid2) > > +{ > > + return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1]; > > +} > > + > > static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark) > > { > > unsigned int mflags = 0; > > diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c > > index e3d836d4d156..1cc68ea56c17 100644 > > --- a/fs/notify/fanotify/fanotify_user.c > > +++ b/fs/notify/fanotify/fanotify_user.c > > @@ -23,7 +23,7 @@ > > > > #include <asm/ioctls.h> > > > > -#include "../../mount.h" > > +#include "../fsnotify.h" > > #include "../fdinfo.h" > > #include "fanotify.h" > > > > @@ -1192,11 +1192,68 @@ static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, > > return recalc; > > } > > > > +struct fan_fsid { > > + struct super_block *sb; > > + __kernel_fsid_t id; > > + bool weak; > > +}; > > + > > +static int fanotify_check_mark_fsid(struct fsnotify_group *group, > > + struct fsnotify_mark *mark, > > + struct fan_fsid *fsid) > > +{ > > + struct fsnotify_mark_connector *conn; > > + struct fsnotify_mark *old; > > + struct super_block *old_sb = NULL; > > + > > + *fanotify_mark_fsid(mark) = fsid->id; > > + mark->flags |= FSNOTIFY_MARK_FLAG_HAS_FSID; > > + if (fsid->weak) > > + mark->flags |= FSNOTIFY_MARK_FLAG_WEAK_FSID; > > + > > + /* First mark added will determine if group is single or multi fsid */ > > + if (list_empty(&group->marks_list)) > > + return 0; > > + > > + /* Find sb of an existing mark */ > > + list_for_each_entry(old, &group->marks_list, g_list) { > > + conn = READ_ONCE(old->connector); > > + if (!conn) > > + continue; > > + old_sb = fsnotify_connector_sb(conn); > > + if (old_sb) > > + break; > > + } > > + > > + /* Only detached marks left? */ > > + if (!old_sb) > > + return 0; > > + > > + /* Do not allow mixing of marks with weak and strong fsid */ > > + if ((mark->flags ^ old->flags) & FSNOTIFY_MARK_FLAG_WEAK_FSID) > > + return -EXDEV; > > + > > + /* Allow mixing of marks with strong fsid from different fs */ > > + if (!fsid->weak) > > + return 0; > > + > > + /* Do not allow mixing marks with weak fsid from different fs */ > > + if (old_sb != fsid->sb) > > + return -EXDEV; > > + > > + /* Do not allow mixing marks from different btrfs sub-volumes */ > > + if (!fanotify_fsid_equal(fanotify_mark_fsid(old), > > + fanotify_mark_fsid(mark))) > > + return -EXDEV; > > + > > + return 0; > > +} > > + > > static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, > > fsnotify_connp_t *connp, > > unsigned int obj_type, > > unsigned int fan_flags, > > - __kernel_fsid_t *fsid) > > + struct fan_fsid *fsid) > > { > > struct ucounts *ucounts = group->fanotify_data.ucounts; > > struct fanotify_mark *fan_mark; > > @@ -1225,8 +1282,9 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, > > > > /* Cache fsid of filesystem containing the marked object */ > > if (fsid) { > > - fan_mark->fsid = *fsid; > > - mark->flags |= FSNOTIFY_MARK_FLAG_HAS_FSID; > > + ret = fanotify_check_mark_fsid(group, mark, fsid); > > + if (ret) > > OOPS, missed fsnotify_put_mark(mark); > better add a goto target out_put_mark as this is the second case now. FYI, I pushed the fix to: https://github.com/amir73il/linux/commits/fanotify_fsid Let me know if you want me to post v2 for this. Thanks, Amir. > > > + goto out_dec_ucounts; > > } else { > > fan_mark->fsid.val[0] = fan_mark->fsid.val[1] = 0; > > } > > @@ -1289,7 +1347,7 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark, > > static int fanotify_add_mark(struct fsnotify_group *group, > > fsnotify_connp_t *connp, unsigned int obj_type, > > __u32 mask, unsigned int fan_flags, > > - __kernel_fsid_t *fsid) > > + struct fan_fsid *fsid) > > { > > struct fsnotify_mark *fsn_mark; > > bool recalc; > > @@ -1337,7 +1395,7 @@ static int fanotify_add_mark(struct fsnotify_group *group, > > > > static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, > > struct vfsmount *mnt, __u32 mask, > > - unsigned int flags, __kernel_fsid_t *fsid) > > + unsigned int flags, struct fan_fsid *fsid) > > { > > return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, > > FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid); > > @@ -1345,7 +1403,7 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, > > > > static int fanotify_add_sb_mark(struct fsnotify_group *group, > > struct super_block *sb, __u32 mask, > > - unsigned int flags, __kernel_fsid_t *fsid) > > + unsigned int flags, struct fan_fsid *fsid) > > { > > return fanotify_add_mark(group, &sb->s_fsnotify_marks, > > FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid); > > @@ -1353,7 +1411,7 @@ static int fanotify_add_sb_mark(struct fsnotify_group *group, > > > > static int fanotify_add_inode_mark(struct fsnotify_group *group, > > struct inode *inode, __u32 mask, > > - unsigned int flags, __kernel_fsid_t *fsid) > > + unsigned int flags, struct fan_fsid *fsid) > > { > > pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); > > > > @@ -1564,20 +1622,25 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) > > return fd; > > } > > > > -static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid) > > +static int fanotify_test_fsid(struct dentry *dentry, unsigned int flags, > > + struct fan_fsid *fsid) > > { > > + unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; > > __kernel_fsid_t root_fsid; > > int err; > > > > /* > > * Make sure dentry is not of a filesystem with zero fsid (e.g. fuse). > > */ > > - err = vfs_get_fsid(dentry, fsid); > > + err = vfs_get_fsid(dentry, &fsid->id); > > if (err) > > return err; > > > > - if (!fsid->val[0] && !fsid->val[1]) > > - return -ENODEV; > > + /* Allow weak fsid when marking inodes */ > > + fsid->sb = dentry->d_sb; > > + fsid->weak = mark_type == FAN_MARK_INODE; > > + if (!fsid->id.val[0] && !fsid->id.val[1]) > > + return fsid->weak ? 0 : -ENODEV; > > > > /* > > * Make sure dentry is not of a filesystem subvolume (e.g. btrfs) > > @@ -1587,10 +1650,10 @@ static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid) > > if (err) > > return err; > > > > - if (root_fsid.val[0] != fsid->val[0] || > > - root_fsid.val[1] != fsid->val[1]) > > - return -EXDEV; > > + if (!fanotify_fsid_equal(&root_fsid, &fsid->id)) > > + return fsid->weak ? 0 : -EXDEV; > > > > + fsid->weak = false; > > return 0; > > } > > > > @@ -1675,7 +1738,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, > > struct fsnotify_group *group; > > struct fd f; > > struct path path; > > - __kernel_fsid_t __fsid, *fsid = NULL; > > + struct fan_fsid __fsid, *fsid = NULL; > > u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; > > unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; > > unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; > > @@ -1827,7 +1890,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, > > } > > > > if (fid_mode) { > > - ret = fanotify_test_fsid(path.dentry, &__fsid); > > + ret = fanotify_test_fsid(path.dentry, flags, &__fsid); > > if (ret) > > goto path_put_and_out; > > > > diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h > > index a80b525ca653..7f63be5ca0f1 100644 > > --- a/include/linux/fsnotify_backend.h > > +++ b/include/linux/fsnotify_backend.h > > @@ -529,6 +529,7 @@ struct fsnotify_mark { > > #define FSNOTIFY_MARK_FLAG_NO_IREF 0x0200 > > #define FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS 0x0400 > > #define FSNOTIFY_MARK_FLAG_HAS_FSID 0x0800 > > +#define FSNOTIFY_MARK_FLAG_WEAK_FSID 0x1000 > > unsigned int flags; /* flags [mark->lock] */ > > }; > > > > -- > > 2.34.1 > >