Re: [PATCH 2/7] fsfreeze: emergency thaw will deadlock on s_umount

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue 10-07-12 17:23:35, Fernando Luis Vázquez Cao wrote:
> The emergency thaw process uses iterate_super() which holds the
> sb->s_umount lock in read mode. The current thaw_super() code takes
> the sb->s_umount lock in write mode, hence leading to an instant
> deadlock.
> 
> Pass the emergency state into the thaw_bdev/thaw_super code to avoid
> taking the s_umount lock in this case. We are running under the bdev
> freeze mutex, so this is still serialised against freeze despite
> only having a read lock on the sb->s_umount. Hence it should be safe
> to execute in this manner, especially given that emergency thaw is a
> rarely executed "get-out-of-jail" feature.
  Umm, I find it ugly to have conditional locking in thaw_super(). IMHO
it would be cleaner to provide a __thaw_super() function that would expect
all the locking to be done already.

Also looking into the code, it seems emergency thaw won't be able to thaw
filesystems frozen with FIFREEZE ioctl (bd_fsfreeze_count will be zero)?
Calling thaw_super() directly would solve this but then we'd leave
bd_fzfreeze_count inconsistent... It's a mess with these two types of
freezing.

Finally this patch collides with my patches fixing deadlocks in filesystem
freezing code (http://www.spinics.net/lists/kernel/msg1355763.html), which
are waiting for Al to merge them. But it should be relatively easy to
resolve the conflict.

								Honza

> Cc: Christoph Hellwig <hch@xxxxxxxxxxxxx>
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> Signed-off-by: Fernando Luis Vazquez Cao <fernando@xxxxxxxxxxxxx>
> ---
> 
> diff -urNp linux-3.5-rc4-orig/fs/block_dev.c linux-3.5-rc4/fs/block_dev.c
> --- linux-3.5-rc4-orig/fs/block_dev.c	2012-06-29 14:03:40.732008807 +0900
> +++ linux-3.5-rc4/fs/block_dev.c	2012-06-29 14:20:29.480002824 +0900
> @@ -305,13 +305,14 @@ struct super_block *freeze_bdev(struct b
>  EXPORT_SYMBOL(freeze_bdev);
>  
>  /**
> - * thaw_bdev  -- unlock filesystem
> + * __thaw_bdev  -- unlock filesystem
>   * @bdev:	blockdevice to unlock
>   * @sb:		associated superblock
> + * @emergency:	emergency thaw
>   *
>   * Unlocks the filesystem and marks it writeable again after freeze_bdev().
>   */
> -int thaw_bdev(struct block_device *bdev, struct super_block *sb)
> +static int __thaw_bdev(struct block_device *bdev, struct super_block *sb, int emergency)
>  {
>  	int error = -EINVAL;
>  
> @@ -327,15 +328,35 @@ int thaw_bdev(struct block_device *bdev,
>  	if (!sb)
>  		goto out;
>  
> -	error = thaw_super(sb);
> +	if (emergency)
> +		error = thaw_super_emergency(sb);
> +	else
> +		error = thaw_super(sb);
>  	if (error)
>  		bdev->bd_fsfreeze_count++;
>  out:
>  	mutex_unlock(&bdev->bd_fsfreeze_mutex);
>  	return error;
>  }
> +
> +/**
> + * thaw_bdev  -- unlock filesystem
> + * @bdev:	blockdevice to unlock
> + * @sb:		associated superblock
> + *
> + * Unlocks the filesystem and marks it writeable again after freeze_bdev().
> + */
> +int thaw_bdev(struct block_device *bdev, struct super_block *sb)
> +{
> +	return __thaw_bdev(bdev, sb, 0);
> +}
>  EXPORT_SYMBOL(thaw_bdev);
>  
> +int thaw_bdev_emergency(struct block_device *bdev, struct super_block *sb)
> +{
> +	return __thaw_bdev(bdev, sb, 1);
> +}
> +
>  static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
>  {
>  	return block_write_full_page(page, blkdev_get_block, wbc);
> diff -urNp linux-3.5-rc4-orig/fs/buffer.c linux-3.5-rc4/fs/buffer.c
> --- linux-3.5-rc4-orig/fs/buffer.c	2012-06-27 12:00:47.762616000 +0900
> +++ linux-3.5-rc4/fs/buffer.c	2012-06-29 14:21:41.948002825 +0900
> @@ -514,7 +514,7 @@ repeat:
>  static void do_thaw_one(struct super_block *sb, void *unused)
>  {
>  	char b[BDEVNAME_SIZE];
> -	while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
> +	while (sb->s_bdev && !thaw_bdev_emergency(sb->s_bdev, sb))
>  		printk(KERN_WARNING "Emergency Thaw on %s\n",
>  		       bdevname(sb->s_bdev, b));
>  }
> diff -urNp linux-3.5-rc4-orig/fs/super.c linux-3.5-rc4/fs/super.c
> --- linux-3.5-rc4-orig/fs/super.c	2012-05-21 07:29:13.000000000 +0900
> +++ linux-3.5-rc4/fs/super.c	2012-06-29 14:49:57.480003483 +0900
> @@ -1221,19 +1221,24 @@ int freeze_super(struct super_block *sb)
>  EXPORT_SYMBOL(freeze_super);
>  
>  /**
> - * thaw_super -- unlock filesystem
> + * __thaw_super -- unlock filesystem
>   * @sb: the super to thaw
> + * @emergency:	emergency thaw
>   *
>   * Unlocks the filesystem and marks it writeable again after freeze_super().
> + * If we are doing an emergency thaw, we don't need to grab the sb->s_umount
> + * lock as it is already held.
>   */
> -int thaw_super(struct super_block *sb)
> +static int __thaw_super(struct super_block *sb, int emergency)
>  {
> -	int error;
> +	int error = 0;
> +
> +	if (!emergency)
> +		down_write(&sb->s_umount);
>  
> -	down_write(&sb->s_umount);
>  	if (sb->s_frozen == SB_UNFROZEN) {
> -		up_write(&sb->s_umount);
> -		return -EINVAL;
> +		error = -EINVAL;
> +		goto out_unlock;
>  	}
>  
>  	if (sb->s_flags & MS_RDONLY)
> @@ -1245,8 +1250,7 @@ int thaw_super(struct super_block *sb)
>  			printk(KERN_ERR
>  				"VFS:Filesystem thaw failed\n");
>  			sb->s_frozen = SB_FREEZE_TRANS;
> -			up_write(&sb->s_umount);
> -			return error;
> +			goto out_unlock;
>  		}
>  	}
>  
> @@ -1254,8 +1258,44 @@ out:
>  	sb->s_frozen = SB_UNFROZEN;
>  	smp_wmb();
>  	wake_up(&sb->s_wait_unfrozen);
> -	deactivate_locked_super(sb);
> +
> +	/*
> +	 * When called from emergency scope, we cannot grab the s_umount lock
> +	 * so we cannot deactivate the superblock. This may leave unbalanced
> +	 * superblock references which could prevent unmount, but given this is
> +	 * an emergency operation....
> +	 */
> +	if (!emergency)
> +		deactivate_locked_super(sb);
>  
>  	return 0;
> +
> +out_unlock:
> +	if (!emergency)
> +		up_write(&sb->s_umount);
> +	return error;
> +}
> +
> +/**
> + * thaw_super -- unlock filesystem
> + * @sb: the super to thaw
> + *
> + * Unlocks the filesystem and marks it writeable again after freeze_super().
> + */
> +int thaw_super(struct super_block *sb)
> +{
> +	return __thaw_super(sb, 0);
>  }
>  EXPORT_SYMBOL(thaw_super);
> +
> +/**
> + * thaw_super_emergency  -- unlock filesystem
> + * @sb: the super to thaw
> + *
> + * Unlocks the filesystem and marks it writeable again after freeze_super().
> + * This avoids taking the s_umount lock if it is already held.
> + */
> +int thaw_super_emergency(struct super_block *sb)
> +{
> +	return __thaw_super(sb, 1);
> +}
> diff -urNp linux-3.5-rc4-orig/include/linux/fs.h linux-3.5-rc4/include/linux/fs.h
> --- linux-3.5-rc4-orig/include/linux/fs.h	2012-06-27 12:00:48.894616001 +0900
> +++ linux-3.5-rc4/include/linux/fs.h	2012-06-29 14:43:51.096010758 +0900
> @@ -1941,6 +1941,7 @@ extern int fd_statfs(int, struct kstatfs
>  extern int vfs_ustat(dev_t, struct kstatfs *);
>  extern int freeze_super(struct super_block *super);
>  extern int thaw_super(struct super_block *super);
> +extern int thaw_super_emergency(struct super_block *super);
>  extern bool our_mnt(struct vfsmount *mnt);
>  
>  extern int current_umask(void);
> @@ -2096,6 +2097,8 @@ extern void kill_bdev(struct block_devic
>  extern struct super_block *freeze_bdev(struct block_device *);
>  extern void emergency_thaw_all(void);
>  extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
> +extern int thaw_bdev_emergency(struct block_device *bdev,
> +			       struct super_block *sb);
>  extern int fsync_bdev(struct block_device *);
>  #else
>  static inline void bd_forget(struct inode *inode) {}
> @@ -2112,6 +2115,12 @@ static inline int thaw_bdev(struct block
>  {
>  	return 0;
>  }
> +
> +static inline int thaw_bdev_emergency(struct block_device *bdev,
> +				      struct super_block *sb)
> +{
> +	return 0;
> +}
>  #endif
>  extern int sync_filesystem(struct super_block *);
>  extern const struct file_operations def_blk_fops;
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
-- 
Jan Kara <jack@xxxxxxx>
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux