On Wed, Mar 20, 2024 at 10:51:34AM +0100, Jan Kara wrote: > On Wed 20-03-24 11:37:57, Amir Goldstein wrote: > > On Wed, Mar 20, 2024 at 10:47 AM Christian Brauner <brauner@xxxxxxxxxx> wrote: > > > > > > On Sun, Mar 17, 2024 at 08:41:51PM +0200, Amir Goldstein wrote: > > > > Define a container struct fsnotify_sb_info to hold per-sb state, > > > > including the reference to sb marks connector. > > > > > > > > Allocate the fsnotify_sb_info state before attaching connector to any > > > > object on the sb and free it only when killing sb. > > > > > > > > This state is going to be used for storing per priority watched objects > > > > counters. > > > > > > > > Suggested-by: Jan Kara <jack@xxxxxxx> > > > > Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> > > > > --- > > > > fs/notify/fsnotify.c | 16 +++++++++++++--- > > > > fs/notify/fsnotify.h | 9 ++++++++- > > > > fs/notify/mark.c | 32 +++++++++++++++++++++++++++++++- > > > > include/linux/fs.h | 8 ++++---- > > > > include/linux/fsnotify_backend.h | 17 +++++++++++++++++ > > > > 5 files changed, 73 insertions(+), 9 deletions(-) > > > > > > > > diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c > > > > index 503e7c75e777..fb3f36bc6ea9 100644 > > > > --- a/fs/notify/fsnotify.c > > > > +++ b/fs/notify/fsnotify.c > > > > @@ -89,11 +89,18 @@ static void fsnotify_unmount_inodes(struct super_block *sb) > > > > > > > > void fsnotify_sb_delete(struct super_block *sb) > > > > { > > > > + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); > > > > + > > > > + /* Were any marks ever added to any object on this sb? */ > > > > + if (!sbinfo) > > > > + return; > > > > + > > > > fsnotify_unmount_inodes(sb); > > > > fsnotify_clear_marks_by_sb(sb); > > > > /* Wait for outstanding object references from connectors */ > > > > wait_var_event(fsnotify_sb_watched_objects(sb), > > > > !atomic_long_read(fsnotify_sb_watched_objects(sb))); > > > > + kfree(sbinfo); > > > > } > > > > > > > > /* > > > > @@ -489,6 +496,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, > > > > { > > > > const struct path *path = fsnotify_data_path(data, data_type); > > > > struct super_block *sb = fsnotify_data_sb(data, data_type); > > > > + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); > > > > struct fsnotify_iter_info iter_info = {}; > > > > struct mount *mnt = NULL; > > > > struct inode *inode2 = NULL; > > > > @@ -525,7 +533,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, > > > > * SRCU because we have no references to any objects and do not > > > > * need SRCU to keep them "alive". > > > > */ > > > > - if (!sb->s_fsnotify_marks && > > > > + if ((!sbinfo || !sbinfo->sb_marks) && > > > > (!mnt || !mnt->mnt_fsnotify_marks) && > > > > (!inode || !inode->i_fsnotify_marks) && > > > > (!inode2 || !inode2->i_fsnotify_marks)) > > > > @@ -552,8 +560,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, > > > > > > > > iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); > > > > > > > > - iter_info.marks[FSNOTIFY_ITER_TYPE_SB] = > > > > - fsnotify_first_mark(&sb->s_fsnotify_marks); > > > > + if (sbinfo) { > > > > + iter_info.marks[FSNOTIFY_ITER_TYPE_SB] = > > > > + fsnotify_first_mark(&sbinfo->sb_marks); > > > > + } > > > > if (mnt) { > > > > iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] = > > > > fsnotify_first_mark(&mnt->mnt_fsnotify_marks); > > > > diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h > > > > index 8b73ad45cc71..378f9ec6d64b 100644 > > > > --- a/fs/notify/fsnotify.h > > > > +++ b/fs/notify/fsnotify.h > > > > @@ -53,6 +53,13 @@ static inline struct super_block *fsnotify_connector_sb( > > > > return fsnotify_object_sb(conn->obj, conn->type); > > > > } > > > > > > > > +static inline fsnotify_connp_t *fsnotify_sb_marks(struct super_block *sb) > > > > +{ > > > > + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); > > > > + > > > > + return sbinfo ? &sbinfo->sb_marks : NULL; > > > > +} > > > > + > > > > /* destroy all events sitting in this groups notification queue */ > > > > extern void fsnotify_flush_notify(struct fsnotify_group *group); > > > > > > > > @@ -78,7 +85,7 @@ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) > > > > /* run the list of all marks associated with sb and destroy them */ > > > > static inline void fsnotify_clear_marks_by_sb(struct super_block *sb) > > > > { > > > > - fsnotify_destroy_marks(&sb->s_fsnotify_marks); > > > > + fsnotify_destroy_marks(fsnotify_sb_marks(sb)); > > > > } > > > > > > > > /* > > > > diff --git a/fs/notify/mark.c b/fs/notify/mark.c > > > > index 0b703f9e6344..db053e0e218d 100644 > > > > --- a/fs/notify/mark.c > > > > +++ b/fs/notify/mark.c > > > > @@ -105,7 +105,7 @@ static fsnotify_connp_t *fsnotify_object_connp(void *obj, int obj_type) > > > > case FSNOTIFY_OBJ_TYPE_VFSMOUNT: > > > > return &real_mount(obj)->mnt_fsnotify_marks; > > > > case FSNOTIFY_OBJ_TYPE_SB: > > > > - return &((struct super_block *)obj)->s_fsnotify_marks; > > > > + return fsnotify_sb_marks(obj); > > > > default: > > > > return NULL; > > > > } > > > > @@ -568,6 +568,26 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) > > > > return -1; > > > > } > > > > > > > > +static int fsnotify_attach_info_to_sb(struct super_block *sb) > > > > +{ > > > > + struct fsnotify_sb_info *sbinfo; > > > > + > > > > + /* sb info is freed on fsnotify_sb_delete() */ > > > > + sbinfo = kzalloc(sizeof(*sbinfo), GFP_KERNEL); > > > > + if (!sbinfo) > > > > + return -ENOMEM; > > > > + > > > > + /* > > > > + * cmpxchg() provides the barrier so that callers of fsnotify_sb_info() > > > > + * will observe an initialized structure > > > > + */ > > > > + if (cmpxchg(&sb->s_fsnotify_info, NULL, sbinfo)) { > > > > + /* Someone else created sbinfo for us */ > > > > + kfree(sbinfo); > > > > + } > > > > > > Alternatively, you could consider using wait_var_event() to let > > > concurrent attachers wait for s_fsnotify_info to be initialized using a > > > sentinel value to indicate that the caller should wait. But not sure if > > > it's worth it. > > > > Not worth it IMO. Adding watches is an extremely rare event > > in the grand picture. > > Agreed. The cmpxchg() scheme has generally proven to be good enough in > similar situations and simple enough to understand... Thanks, sounds good to me.