Re: [RFC PATCH] fs: make s_count atomic_t

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri 27-10-23 11:35:20, Christian Brauner wrote:
> So, I believe we can drop the sb_lock in bdev_super_lock() for all
> holder operations if we turn s_count into an atomic. It will slightly
> change semantics for list walks like iterate_supers() but imho that's
> fine. It'll mean that list walkes need a acquire sb_lock, then try to
> get reference via atomic_inc_not_zero().
> 
> The logic there is simply that if you still found the superblock on the
> list then yes, someone could now concurrently drop s_count to zero
> behind your back. But because you hold sb_lock they can't remove it from
> the list behind your back.
> 
> So if you now fail atomic_inc_not_zero() then you know that someone
> concurrently managed to drop the ref to zero and wants to remove that sb
> from the list. So you just ignore that super block and go on walking the
> list. If however, you manage to get an active reference things are fine
> and you can try to trade that temporary reference for an active
> reference. So my theory at least...
> 
> Yes, ofc we add atomics but for superblocks we shouldn't care especially
> we have less and less list walkers. Both get_super() and
> get_active_super() are gone after all.
> 
> I'm running xfstests as I'm sending this and I need to start finishing
> PRs so in RFC mode. Thoughts?
> 
> Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx>

So in principle I agree we can get rid of some sb_lock use if we convert
sb->s_count to an atomic type. However does this bring any significant
benefit? I would not expect sb_lock to be contended and as you say you need
to be more careful about the races then so that is a reason against such
change.

								Honza

> ---
>  fs/super.c         | 93 ++++++++++++++++++++++++++--------------------
>  include/linux/fs.h |  2 +-
>  2 files changed, 53 insertions(+), 42 deletions(-)
> 
> diff --git a/fs/super.c b/fs/super.c
> index 71e5e61cfc9e..c58de6bb5633 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -375,7 +375,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
>  	INIT_LIST_HEAD(&s->s_inodes_wb);
>  	spin_lock_init(&s->s_inode_wblist_lock);
>  
> -	s->s_count = 1;
> +	atomic_set(&s->s_count, 1);
>  	atomic_set(&s->s_active, 1);
>  	mutex_init(&s->s_vfs_rename_mutex);
>  	lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
> @@ -409,19 +409,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
>  /*
>   * Drop a superblock's refcount.  The caller must hold sb_lock.
>   */
> -static void __put_super(struct super_block *s)
> -{
> -	if (!--s->s_count) {
> -		list_del_init(&s->s_list);
> -		WARN_ON(s->s_dentry_lru.node);
> -		WARN_ON(s->s_inode_lru.node);
> -		WARN_ON(!list_empty(&s->s_mounts));
> -		security_sb_free(s);
> -		put_user_ns(s->s_user_ns);
> -		kfree(s->s_subtype);
> -		call_rcu(&s->rcu, destroy_super_rcu);
> -	}
> -}
>  
>  /**
>   *	put_super	-	drop a temporary reference to superblock
> @@ -430,10 +417,20 @@ static void __put_super(struct super_block *s)
>   *	Drops a temporary reference, frees superblock if there's no
>   *	references left.
>   */
> -void put_super(struct super_block *sb)
> +void put_super(struct super_block *s)
>  {
> +	if (!atomic_dec_and_test(&s->s_count))
> +		return;
> +
>  	spin_lock(&sb_lock);
> -	__put_super(sb);
> +	list_del_init(&s->s_list);
> +	WARN_ON(s->s_dentry_lru.node);
> +	WARN_ON(s->s_inode_lru.node);
> +	WARN_ON(!list_empty(&s->s_mounts));
> +	security_sb_free(s);
> +	put_user_ns(s->s_user_ns);
> +	kfree(s->s_subtype);
> +	call_rcu(&s->rcu, destroy_super_rcu);
>  	spin_unlock(&sb_lock);
>  }
>  
> @@ -548,8 +545,11 @@ static bool grab_super(struct super_block *sb)
>  {
>  	bool locked;
>  
> -	sb->s_count++;
> +	locked = atomic_inc_not_zero(&sb->s_count);
>  	spin_unlock(&sb_lock);
> +	if (!locked)
> +		return false;
> +
>  	locked = super_lock_excl(sb);
>  	if (locked) {
>  		if (atomic_inc_not_zero(&sb->s_active)) {
> @@ -908,19 +908,20 @@ static void __iterate_supers(void (*f)(struct super_block *))
>  		/* Pairs with memory marrier in super_wake(). */
>  		if (smp_load_acquire(&sb->s_flags) & SB_DYING)
>  			continue;
> -		sb->s_count++;
> +		if (!atomic_inc_not_zero(&sb->s_count))
> +			continue;
>  		spin_unlock(&sb_lock);
>  
>  		f(sb);
>  
> -		spin_lock(&sb_lock);
>  		if (p)
> -			__put_super(p);
> +			put_super(p);
>  		p = sb;
> +		spin_lock(&sb_lock);
>  	}
> -	if (p)
> -		__put_super(p);
>  	spin_unlock(&sb_lock);
> +	if (p)
> +		put_super(p);
>  }
>  /**
>   *	iterate_supers - call function for all active superblocks
> @@ -938,7 +939,8 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
>  	list_for_each_entry(sb, &super_blocks, s_list) {
>  		bool locked;
>  
> -		sb->s_count++;
> +		if (!atomic_inc_not_zero(&sb->s_count))
> +			continue;
>  		spin_unlock(&sb_lock);
>  
>  		locked = super_lock_shared(sb);
> @@ -948,14 +950,14 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
>  			super_unlock_shared(sb);
>  		}
>  
> -		spin_lock(&sb_lock);
>  		if (p)
> -			__put_super(p);
> +			put_super(p);
>  		p = sb;
> +		spin_lock(&sb_lock);
>  	}
> -	if (p)
> -		__put_super(p);
>  	spin_unlock(&sb_lock);
> +	if (p)
> +		put_super(p);
>  }
>  
>  /**
> @@ -976,7 +978,8 @@ void iterate_supers_type(struct file_system_type *type,
>  	hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
>  		bool locked;
>  
> -		sb->s_count++;
> +		if (!atomic_inc_not_zero(&sb->s_count))
> +			continue;
>  		spin_unlock(&sb_lock);
>  
>  		locked = super_lock_shared(sb);
> @@ -986,14 +989,14 @@ void iterate_supers_type(struct file_system_type *type,
>  			super_unlock_shared(sb);
>  		}
>  
> -		spin_lock(&sb_lock);
>  		if (p)
> -			__put_super(p);
> +			put_super(p);
>  		p = sb;
> +		spin_lock(&sb_lock);
>  	}
> -	if (p)
> -		__put_super(p);
>  	spin_unlock(&sb_lock);
> +	if (p)
> +		put_super(p);
>  }
>  
>  EXPORT_SYMBOL(iterate_supers_type);
> @@ -1007,7 +1010,8 @@ struct super_block *user_get_super(dev_t dev, bool excl)
>  		if (sb->s_dev ==  dev) {
>  			bool locked;
>  
> -			sb->s_count++;
> +			if (!atomic_inc_not_zero(&sb->s_count))
> +				continue;
>  			spin_unlock(&sb_lock);
>  			/* still alive? */
>  			locked = super_lock(sb, excl);
> @@ -1017,8 +1021,8 @@ struct super_block *user_get_super(dev_t dev, bool excl)
>  				super_unlock(sb, excl);
>  			}
>  			/* nope, got unmounted */
> +			put_super(sb);
>  			spin_lock(&sb_lock);
> -			__put_super(sb);
>  			break;
>  		}
>  	}
> @@ -1387,20 +1391,27 @@ static struct super_block *bdev_super_lock(struct block_device *bdev, bool excl)
>  	__releases(&bdev->bd_holder_lock)
>  {
>  	struct super_block *sb = bdev->bd_holder;
> -	bool locked;
> +	bool active;
>  
>  	lockdep_assert_held(&bdev->bd_holder_lock);
>  	lockdep_assert_not_held(&sb->s_umount);
>  	lockdep_assert_not_held(&bdev->bd_disk->open_mutex);
>  
> -	/* Make sure sb doesn't go away from under us */
> -	spin_lock(&sb_lock);
> -	sb->s_count++;
> -	spin_unlock(&sb_lock);
> +	active = atomic_inc_not_zero(&sb->s_count);
>  
>  	mutex_unlock(&bdev->bd_holder_lock);
>  
> -	locked = super_lock(sb, excl);
> +	/*
> +	 * The bd_holder_lock guarantees that @sb is still valid.
> +	 * sb->s_count can't be zero. If it were it would mean that we
> +	 * found a block device that has bdev->bd_holder set to a
> +	 * superblock that's about to be freed. IOW, there's a UAF
> +	 * somewhere...
> +	 */
> +	if (WARN_ON_ONCE(!active))
> +		return NULL;
> +
> +	active = super_lock(sb, excl);
>  
>  	/*
>  	 * If the superblock wasn't already SB_DYING then we hold
> @@ -1408,7 +1419,7 @@ static struct super_block *bdev_super_lock(struct block_device *bdev, bool excl)
>           */
>  	put_super(sb);
>  
> -	if (!locked)
> +	if (!active)
>  		return NULL;
>  
>  	if (!sb->s_root || !(sb->s_flags & SB_ACTIVE)) {
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 5174e821d451..68e453c155af 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1201,7 +1201,7 @@ struct super_block {
>  	unsigned long		s_magic;
>  	struct dentry		*s_root;
>  	struct rw_semaphore	s_umount;
> -	int			s_count;
> +	atomic_t		s_count;
>  	atomic_t		s_active;
>  #ifdef CONFIG_SECURITY
>  	void                    *s_security;
> -- 
> 2.34.1
> 
-- 
Jan Kara <jack@xxxxxxxx>
SUSE Labs, CR




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux