On Thu, Oct 03, 2024 at 02:26:57PM +0200, Jan Kara wrote: > On Thu 03-10-24 05:11:11, Christoph Hellwig wrote: > > On Thu, Oct 03, 2024 at 01:57:21PM +0200, Jan Kara wrote: > > > Fair enough. If we go with the iterator variant I've suggested to Dave in > > > [1], we could combine the evict_inodes(), fsnotify_unmount_inodes() and > > > Landlocks hook_sb_delete() into a single iteration relatively easily. But > > > I'd wait with that convertion until this series lands. > > > > I don't see how that has anything to do with iterators or not. > > Well, the patches would obviously conflict Conflict with what? > which seems pointless if we > could live with three iterations for a few years until somebody noticed :). > And with current Dave's version of iterators it will not be possible to > integrate evict_inodes() iteration with the other two without a layering > violation. Still we could go from 3 to 2 iterations. What layering violation? Below is quick compile tested part to do the fsnotify side and get rid of the fsnotify iteration, which looks easily worth it. landlock is a bit more complex because of lsm hooks, and the internal underobj abstraction inside of landlock. But looking at the release inode data vs unomunt synchronization it has and the duplicate code I think doing it this way is worth the effort even more, it'll just need someone who knows landlock and the lsm layering to help with the work. diff --git a/fs/inode.c b/fs/inode.c index 3f335f78c5b228..7d5f8a09e4d29d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -789,11 +789,23 @@ static bool dispose_list(struct list_head *head) */ static int evict_inode_fn(struct inode *inode, void *data) { + struct super_block *sb = inode->i_sb; struct list_head *dispose = data; + bool post_unmount = !(sb->s_flags & SB_ACTIVE); spin_lock(&inode->i_lock); - if (atomic_read(&inode->i_count) || - (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))) { + if (atomic_read(&inode->i_count)) { + spin_unlock(&inode->i_lock); + + /* for each watch, send FS_UNMOUNT and then remove it */ + if (post_unmount && fsnotify_sb_info(sb)) { + fsnotify_inode(inode, FS_UNMOUNT); + fsnotify_inode_delete(inode); + } + return INO_ITER_DONE; + } + + if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { spin_unlock(&inode->i_lock); return INO_ITER_DONE; } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 68c34ed9427190..cf89aa69e82c8d 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -28,16 +28,6 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt) fsnotify_clear_marks_by_mount(mnt); } -static int fsnotify_unmount_inode_fn(struct inode *inode, void *data) -{ - spin_unlock(&inode->i_lock); - - /* for each watch, send FS_UNMOUNT and then remove it */ - fsnotify_inode(inode, FS_UNMOUNT); - fsnotify_inode_delete(inode); - return INO_ITER_DONE; -} - void fsnotify_sb_delete(struct super_block *sb) { struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); @@ -46,19 +36,6 @@ void fsnotify_sb_delete(struct super_block *sb) if (!sbinfo) return; - /* - * If i_count is zero, the inode cannot have any watches and - * doing an __iget/iput with SB_ACTIVE clear would actually - * evict all inodes with zero i_count from icache which is - * unnecessarily violent and may in fact be illegal to do. - * However, we should have been called /after/ evict_inodes - * removed all zero refcount inodes, in any case. Hence we use - * INO_ITER_REFERENCED to ensure zero refcount inodes are filtered - * properly. - */ - super_iter_inodes(sb, fsnotify_unmount_inode_fn, NULL, - INO_ITER_REFERENCED); - fsnotify_clear_marks_by_sb(sb); /* Wait for outstanding object references from connectors */ wait_var_event(fsnotify_sb_watched_objects(sb), diff --git a/fs/super.c b/fs/super.c index 971ad4e996e0ba..88dd1703fe73db 100644 --- a/fs/super.c +++ b/fs/super.c @@ -167,28 +167,17 @@ static void super_wake(struct super_block *sb, unsigned int flag) wake_up_var(&sb->s_flags); } -bool super_iter_iget(struct inode *inode, int flags) +bool super_iter_iget(struct inode *inode) { - bool ret = false; + bool ret = false; spin_lock(&inode->i_lock); - if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) - goto out_unlock; - - /* - * Skip over zero refcount inode if the caller only wants - * referenced inodes to be iterated. - */ - if ((flags & INO_ITER_REFERENCED) && - !atomic_read(&inode->i_count)) - goto out_unlock; - - __iget(inode); - ret = true; -out_unlock: + if (!(inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))) { + __iget(inode); + ret = true; + } spin_unlock(&inode->i_lock); return ret; - } EXPORT_SYMBOL_GPL(super_iter_iget); @@ -216,7 +205,7 @@ int super_iter_inodes(struct super_block *sb, ino_iter_fn iter_fn, spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { - if (!super_iter_iget(inode, flags)) + if (!super_iter_iget(inode)) continue; spin_unlock(&sb->s_inode_list_lock); iput(old_inode); diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index ee544556cee728..5a174e690424fb 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1654,8 +1654,7 @@ xfs_iter_vfs_igrab( if (ip->i_flags & XFS_ITER_VFS_NOGRAB_IFLAGS) goto out_unlock_noent; - if ((flags & INO_ITER_UNSAFE) || - super_iter_iget(inode, flags)) + if ((flags & INO_ITER_UNSAFE) || super_iter_iget(inode)) ret = true; out_unlock_noent: diff --git a/include/linux/fs.h b/include/linux/fs.h index 2aa335228b84bf..a3c682f0d94c1b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2224,7 +2224,7 @@ enum freeze_holder { typedef int (*ino_iter_fn)(struct inode *inode, void *priv); int super_iter_inodes(struct super_block *sb, ino_iter_fn iter_fn, void *private_data, int flags); -bool super_iter_iget(struct inode *inode, int flags); +bool super_iter_iget(struct inode *inode); struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb);