Re: [PATCH v5] writeback: Fix inode->i_io_list not be protected by inode->i_lock error

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue 24-05-22 08:05:40, Jchao Sun wrote:
> Commit b35250c0816c ("writeback: Protect inode->i_io_list with
> inode->i_lock") made inode->i_io_list not only protected by
> wb->list_lock but also inode->i_lock, but inode_io_list_move_locked()
> was missed. Add lock there and also update comment describing
> things protected by inode->i_lock. This also fixes a race where
> __mark_inode_dirty() could move inode under flush worker's hands
> and thus sync(2) could miss writing some inodes.
> 
> Fixes: b35250c0816c ("writeback: Protect inode->i_io_list with inode->i_lock")
> Signed-off-by: Jchao Sun <sunjunchao2870@xxxxxxxxx>

Thanks for the fix! It looks good to me now (modulo some too long comment
lines but I can wrap those on commit). I'll take the patch to my tree once
I push out stuff I have ready for the merge window.

								Honza

> ---
>  fs/fs-writeback.c | 37 ++++++++++++++++++++++++++++---------
>  fs/inode.c        |  2 +-
>  2 files changed, 29 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
> index 591fe9cf1659..99793bb838e5 100644
> --- a/fs/fs-writeback.c
> +++ b/fs/fs-writeback.c
> @@ -120,6 +120,7 @@ static bool inode_io_list_move_locked(struct inode *inode,
>  				      struct list_head *head)
>  {
>  	assert_spin_locked(&wb->list_lock);
> +	assert_spin_locked(&inode->i_lock);
>  
>  	list_move(&inode->i_io_list, head);
>  
> @@ -1365,9 +1366,9 @@ static int move_expired_inodes(struct list_head *delaying_queue,
>  		inode = wb_inode(delaying_queue->prev);
>  		if (inode_dirtied_after(inode, dirtied_before))
>  			break;
> +		spin_lock(&inode->i_lock);
>  		list_move(&inode->i_io_list, &tmp);
>  		moved++;
> -		spin_lock(&inode->i_lock);
>  		inode->i_state |= I_SYNC_QUEUED;
>  		spin_unlock(&inode->i_lock);
>  		if (sb_is_blkdev_sb(inode->i_sb))
> @@ -1383,7 +1384,12 @@ static int move_expired_inodes(struct list_head *delaying_queue,
>  		goto out;
>  	}
>  
> -	/* Move inodes from one superblock together */
> +	/*
> +	 * Although inode's i_io_list is moved from 'tmp' to 'dispatch_queue',
> +	 * we don't take inode->i_lock here because it is just a pointless overhead.
> +	 * Inode is already marked as I_SYNC_QUEUED so writeback list handling is
> +	 * fully under our control.
> +	 */
>  	while (!list_empty(&tmp)) {
>  		sb = wb_inode(tmp.prev)->i_sb;
>  		list_for_each_prev_safe(pos, node, &tmp) {
> @@ -1821,8 +1827,8 @@ static long writeback_sb_inodes(struct super_block *sb,
>  			 * We'll have another go at writing back this inode
>  			 * when we completed a full scan of b_io.
>  			 */
> -			spin_unlock(&inode->i_lock);
>  			requeue_io(inode, wb);
> +			spin_unlock(&inode->i_lock);
>  			trace_writeback_sb_inodes_requeue(inode);
>  			continue;
>  		}
> @@ -2351,6 +2357,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
>  {
>  	struct super_block *sb = inode->i_sb;
>  	int dirtytime = 0;
> +	struct bdi_writeback *wb = NULL;
>  
>  	trace_writeback_mark_inode_dirty(inode, flags);
>  
> @@ -2402,6 +2409,17 @@ void __mark_inode_dirty(struct inode *inode, int flags)
>  			inode->i_state &= ~I_DIRTY_TIME;
>  		inode->i_state |= flags;
>  
> +		/*
> +		 * Grab inode's wb early because it requires dropping i_lock and we
> +		 * need to make sure following checks happen atomically with dirty
> +		 * list handling so that we don't move inodes under flush worker's
> +		 * hands.
> +		 */
> +		if (!was_dirty) {
> +			wb = locked_inode_to_wb_and_lock_list(inode);
> +			spin_lock(&inode->i_lock);
> +		}
> +
>  		/*
>  		 * If the inode is queued for writeback by flush worker, just
>  		 * update its dirty state. Once the flush worker is done with
> @@ -2409,7 +2427,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
>  		 * list, based upon its state.
>  		 */
>  		if (inode->i_state & I_SYNC_QUEUED)
> -			goto out_unlock_inode;
> +			goto out_unlock;
>  
>  		/*
>  		 * Only add valid (hashed) inodes to the superblock's
> @@ -2417,22 +2435,19 @@ void __mark_inode_dirty(struct inode *inode, int flags)
>  		 */
>  		if (!S_ISBLK(inode->i_mode)) {
>  			if (inode_unhashed(inode))
> -				goto out_unlock_inode;
> +				goto out_unlock;
>  		}
>  		if (inode->i_state & I_FREEING)
> -			goto out_unlock_inode;
> +			goto out_unlock;
>  
>  		/*
>  		 * If the inode was already on b_dirty/b_io/b_more_io, don't
>  		 * reposition it (that would break b_dirty time-ordering).
>  		 */
>  		if (!was_dirty) {
> -			struct bdi_writeback *wb;
>  			struct list_head *dirty_list;
>  			bool wakeup_bdi = false;
>  
> -			wb = locked_inode_to_wb_and_lock_list(inode);
> -
>  			inode->dirtied_when = jiffies;
>  			if (dirtytime)
>  				inode->dirtied_time_when = jiffies;
> @@ -2446,6 +2461,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
>  							       dirty_list);
>  
>  			spin_unlock(&wb->list_lock);
> +			spin_unlock(&inode->i_lock);
>  			trace_writeback_dirty_inode_enqueue(inode);
>  
>  			/*
> @@ -2460,6 +2476,9 @@ void __mark_inode_dirty(struct inode *inode, int flags)
>  			return;
>  		}
>  	}
> +out_unlock:
> +	if (wb)
> +		spin_unlock(&wb->list_lock);
>  out_unlock_inode:
>  	spin_unlock(&inode->i_lock);
>  }
> diff --git a/fs/inode.c b/fs/inode.c
> index 9d9b422504d1..bd4da9c5207e 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -27,7 +27,7 @@
>   * Inode locking rules:
>   *
>   * inode->i_lock protects:
> - *   inode->i_state, inode->i_hash, __iget()
> + *   inode->i_state, inode->i_hash, __iget(), inode->i_io_list
>   * Inode LRU list locks protect:
>   *   inode->i_sb->s_inode_lru, inode->i_lru
>   * inode->i_sb->s_inode_list_lock protects:
> -- 
> 2.17.1
> 
-- 
Jan Kara <jack@xxxxxxxx>
SUSE Labs, CR



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux