On Fri 27-10-23 11:35:20, Christian Brauner wrote: > So, I believe we can drop the sb_lock in bdev_super_lock() for all > holder operations if we turn s_count into an atomic. It will slightly > change semantics for list walks like iterate_supers() but imho that's > fine. It'll mean that list walkes need a acquire sb_lock, then try to > get reference via atomic_inc_not_zero(). > > The logic there is simply that if you still found the superblock on the > list then yes, someone could now concurrently drop s_count to zero > behind your back. But because you hold sb_lock they can't remove it from > the list behind your back. > > So if you now fail atomic_inc_not_zero() then you know that someone > concurrently managed to drop the ref to zero and wants to remove that sb > from the list. So you just ignore that super block and go on walking the > list. If however, you manage to get an active reference things are fine > and you can try to trade that temporary reference for an active > reference. So my theory at least... > > Yes, ofc we add atomics but for superblocks we shouldn't care especially > we have less and less list walkers. Both get_super() and > get_active_super() are gone after all. > > I'm running xfstests as I'm sending this and I need to start finishing > PRs so in RFC mode. Thoughts? > > Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx> So in principle I agree we can get rid of some sb_lock use if we convert sb->s_count to an atomic type. However does this bring any significant benefit? I would not expect sb_lock to be contended and as you say you need to be more careful about the races then so that is a reason against such change. Honza > --- > fs/super.c | 93 ++++++++++++++++++++++++++-------------------- > include/linux/fs.h | 2 +- > 2 files changed, 53 insertions(+), 42 deletions(-) > > diff --git a/fs/super.c b/fs/super.c > index 71e5e61cfc9e..c58de6bb5633 100644 > --- a/fs/super.c > +++ b/fs/super.c > @@ -375,7 +375,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, > INIT_LIST_HEAD(&s->s_inodes_wb); > spin_lock_init(&s->s_inode_wblist_lock); > > - s->s_count = 1; > + atomic_set(&s->s_count, 1); > atomic_set(&s->s_active, 1); > mutex_init(&s->s_vfs_rename_mutex); > lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); > @@ -409,19 +409,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, > /* > * Drop a superblock's refcount. The caller must hold sb_lock. > */ > -static void __put_super(struct super_block *s) > -{ > - if (!--s->s_count) { > - list_del_init(&s->s_list); > - WARN_ON(s->s_dentry_lru.node); > - WARN_ON(s->s_inode_lru.node); > - WARN_ON(!list_empty(&s->s_mounts)); > - security_sb_free(s); > - put_user_ns(s->s_user_ns); > - kfree(s->s_subtype); > - call_rcu(&s->rcu, destroy_super_rcu); > - } > -} > > /** > * put_super - drop a temporary reference to superblock > @@ -430,10 +417,20 @@ static void __put_super(struct super_block *s) > * Drops a temporary reference, frees superblock if there's no > * references left. > */ > -void put_super(struct super_block *sb) > +void put_super(struct super_block *s) > { > + if (!atomic_dec_and_test(&s->s_count)) > + return; > + > spin_lock(&sb_lock); > - __put_super(sb); > + list_del_init(&s->s_list); > + WARN_ON(s->s_dentry_lru.node); > + WARN_ON(s->s_inode_lru.node); > + WARN_ON(!list_empty(&s->s_mounts)); > + security_sb_free(s); > + put_user_ns(s->s_user_ns); > + kfree(s->s_subtype); > + call_rcu(&s->rcu, destroy_super_rcu); > spin_unlock(&sb_lock); > } > > @@ -548,8 +545,11 @@ static bool grab_super(struct super_block *sb) > { > bool locked; > > - sb->s_count++; > + locked = atomic_inc_not_zero(&sb->s_count); > spin_unlock(&sb_lock); > + if (!locked) > + return false; > + > locked = super_lock_excl(sb); > if (locked) { > if (atomic_inc_not_zero(&sb->s_active)) { > @@ -908,19 +908,20 @@ static void __iterate_supers(void (*f)(struct super_block *)) > /* Pairs with memory marrier in super_wake(). */ > if (smp_load_acquire(&sb->s_flags) & SB_DYING) > continue; > - sb->s_count++; > + if (!atomic_inc_not_zero(&sb->s_count)) > + continue; > spin_unlock(&sb_lock); > > f(sb); > > - spin_lock(&sb_lock); > if (p) > - __put_super(p); > + put_super(p); > p = sb; > + spin_lock(&sb_lock); > } > - if (p) > - __put_super(p); > spin_unlock(&sb_lock); > + if (p) > + put_super(p); > } > /** > * iterate_supers - call function for all active superblocks > @@ -938,7 +939,8 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg) > list_for_each_entry(sb, &super_blocks, s_list) { > bool locked; > > - sb->s_count++; > + if (!atomic_inc_not_zero(&sb->s_count)) > + continue; > spin_unlock(&sb_lock); > > locked = super_lock_shared(sb); > @@ -948,14 +950,14 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg) > super_unlock_shared(sb); > } > > - spin_lock(&sb_lock); > if (p) > - __put_super(p); > + put_super(p); > p = sb; > + spin_lock(&sb_lock); > } > - if (p) > - __put_super(p); > spin_unlock(&sb_lock); > + if (p) > + put_super(p); > } > > /** > @@ -976,7 +978,8 @@ void iterate_supers_type(struct file_system_type *type, > hlist_for_each_entry(sb, &type->fs_supers, s_instances) { > bool locked; > > - sb->s_count++; > + if (!atomic_inc_not_zero(&sb->s_count)) > + continue; > spin_unlock(&sb_lock); > > locked = super_lock_shared(sb); > @@ -986,14 +989,14 @@ void iterate_supers_type(struct file_system_type *type, > super_unlock_shared(sb); > } > > - spin_lock(&sb_lock); > if (p) > - __put_super(p); > + put_super(p); > p = sb; > + spin_lock(&sb_lock); > } > - if (p) > - __put_super(p); > spin_unlock(&sb_lock); > + if (p) > + put_super(p); > } > > EXPORT_SYMBOL(iterate_supers_type); > @@ -1007,7 +1010,8 @@ struct super_block *user_get_super(dev_t dev, bool excl) > if (sb->s_dev == dev) { > bool locked; > > - sb->s_count++; > + if (!atomic_inc_not_zero(&sb->s_count)) > + continue; > spin_unlock(&sb_lock); > /* still alive? */ > locked = super_lock(sb, excl); > @@ -1017,8 +1021,8 @@ struct super_block *user_get_super(dev_t dev, bool excl) > super_unlock(sb, excl); > } > /* nope, got unmounted */ > + put_super(sb); > spin_lock(&sb_lock); > - __put_super(sb); > break; > } > } > @@ -1387,20 +1391,27 @@ static struct super_block *bdev_super_lock(struct block_device *bdev, bool excl) > __releases(&bdev->bd_holder_lock) > { > struct super_block *sb = bdev->bd_holder; > - bool locked; > + bool active; > > lockdep_assert_held(&bdev->bd_holder_lock); > lockdep_assert_not_held(&sb->s_umount); > lockdep_assert_not_held(&bdev->bd_disk->open_mutex); > > - /* Make sure sb doesn't go away from under us */ > - spin_lock(&sb_lock); > - sb->s_count++; > - spin_unlock(&sb_lock); > + active = atomic_inc_not_zero(&sb->s_count); > > mutex_unlock(&bdev->bd_holder_lock); > > - locked = super_lock(sb, excl); > + /* > + * The bd_holder_lock guarantees that @sb is still valid. > + * sb->s_count can't be zero. If it were it would mean that we > + * found a block device that has bdev->bd_holder set to a > + * superblock that's about to be freed. IOW, there's a UAF > + * somewhere... > + */ > + if (WARN_ON_ONCE(!active)) > + return NULL; > + > + active = super_lock(sb, excl); > > /* > * If the superblock wasn't already SB_DYING then we hold > @@ -1408,7 +1419,7 @@ static struct super_block *bdev_super_lock(struct block_device *bdev, bool excl) > */ > put_super(sb); > > - if (!locked) > + if (!active) > return NULL; > > if (!sb->s_root || !(sb->s_flags & SB_ACTIVE)) { > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 5174e821d451..68e453c155af 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -1201,7 +1201,7 @@ struct super_block { > unsigned long s_magic; > struct dentry *s_root; > struct rw_semaphore s_umount; > - int s_count; > + atomic_t s_count; > atomic_t s_active; > #ifdef CONFIG_SECURITY > void *s_security; > -- > 2.34.1 > -- Jan Kara <jack@xxxxxxxx> SUSE Labs, CR