This is a note to let you know that I've just added the patch titled writeback: Fix inode->i_io_list not be protected by inode->i_lock error to the 5.10-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: writeback-fix-inode-i_io_list-not-be-protected-by-in.patch and it can be found in the queue-5.10 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. commit a6d977e8d36035d60883d72a9a881a77520bfc03 Author: Jchao Sun <sunjunchao2870@xxxxxxxxx> Date: Tue May 24 08:05:40 2022 -0700 writeback: Fix inode->i_io_list not be protected by inode->i_lock error [ Upstream commit 10e14073107dd0b6d97d9516a02845a8e501c2c9 ] Commit b35250c0816c ("writeback: Protect inode->i_io_list with inode->i_lock") made inode->i_io_list not only protected by wb->list_lock but also inode->i_lock, but inode_io_list_move_locked() was missed. Add lock there and also update comment describing things protected by inode->i_lock. This also fixes a race where __mark_inode_dirty() could move inode under flush worker's hands and thus sync(2) could miss writing some inodes. Fixes: b35250c0816c ("writeback: Protect inode->i_io_list with inode->i_lock") Link: https://lore.kernel.org/r/20220524150540.12552-1-sunjunchao2870@xxxxxxxxx CC: stable@xxxxxxxxxxxxxxx Signed-off-by: Jchao Sun <sunjunchao2870@xxxxxxxxx> Signed-off-by: Jan Kara <jack@xxxxxxx> Stable-dep-of: cbfecb927f42 ("fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE") Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 645e3f6ffe44..4c667662a4d9 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -120,6 +120,7 @@ static bool inode_io_list_move_locked(struct inode *inode, struct list_head *head) { assert_spin_locked(&wb->list_lock); + assert_spin_locked(&inode->i_lock); list_move(&inode->i_io_list, head); @@ -1282,9 +1283,9 @@ static int move_expired_inodes(struct list_head *delaying_queue, inode = wb_inode(delaying_queue->prev); if (inode_dirtied_after(inode, dirtied_before)) break; + spin_lock(&inode->i_lock); list_move(&inode->i_io_list, &tmp); moved++; - spin_lock(&inode->i_lock); inode->i_state |= I_SYNC_QUEUED; spin_unlock(&inode->i_lock); if (sb_is_blkdev_sb(inode->i_sb)) @@ -1300,7 +1301,12 @@ static int move_expired_inodes(struct list_head *delaying_queue, goto out; } - /* Move inodes from one superblock together */ + /* + * Although inode's i_io_list is moved from 'tmp' to 'dispatch_queue', + * we don't take inode->i_lock here because it is just a pointless overhead. + * Inode is already marked as I_SYNC_QUEUED so writeback list handling is + * fully under our control. + */ while (!list_empty(&tmp)) { sb = wb_inode(tmp.prev)->i_sb; list_for_each_prev_safe(pos, node, &tmp) { @@ -1726,8 +1732,8 @@ static long writeback_sb_inodes(struct super_block *sb, * We'll have another go at writing back this inode * when we completed a full scan of b_io. */ - spin_unlock(&inode->i_lock); requeue_io(inode, wb); + spin_unlock(&inode->i_lock); trace_writeback_sb_inodes_requeue(inode); continue; } @@ -2265,6 +2271,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) { struct super_block *sb = inode->i_sb; int dirtytime = 0; + struct bdi_writeback *wb = NULL; trace_writeback_mark_inode_dirty(inode, flags); @@ -2316,6 +2323,17 @@ void __mark_inode_dirty(struct inode *inode, int flags) inode->i_state &= ~I_DIRTY_TIME; inode->i_state |= flags; + /* + * Grab inode's wb early because it requires dropping i_lock and we + * need to make sure following checks happen atomically with dirty + * list handling so that we don't move inodes under flush worker's + * hands. + */ + if (!was_dirty) { + wb = locked_inode_to_wb_and_lock_list(inode); + spin_lock(&inode->i_lock); + } + /* * If the inode is queued for writeback by flush worker, just * update its dirty state. Once the flush worker is done with @@ -2323,7 +2341,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) * list, based upon its state. */ if (inode->i_state & I_SYNC_QUEUED) - goto out_unlock_inode; + goto out_unlock; /* * Only add valid (hashed) inodes to the superblock's @@ -2331,22 +2349,19 @@ void __mark_inode_dirty(struct inode *inode, int flags) */ if (!S_ISBLK(inode->i_mode)) { if (inode_unhashed(inode)) - goto out_unlock_inode; + goto out_unlock; } if (inode->i_state & I_FREEING) - goto out_unlock_inode; + goto out_unlock; /* * If the inode was already on b_dirty/b_io/b_more_io, don't * reposition it (that would break b_dirty time-ordering). */ if (!was_dirty) { - struct bdi_writeback *wb; struct list_head *dirty_list; bool wakeup_bdi = false; - wb = locked_inode_to_wb_and_lock_list(inode); - inode->dirtied_when = jiffies; if (dirtytime) inode->dirtied_time_when = jiffies; @@ -2360,6 +2375,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) dirty_list); spin_unlock(&wb->list_lock); + spin_unlock(&inode->i_lock); trace_writeback_dirty_inode_enqueue(inode); /* @@ -2374,6 +2390,9 @@ void __mark_inode_dirty(struct inode *inode, int flags) return; } } +out_unlock: + if (wb) + spin_unlock(&wb->list_lock); out_unlock_inode: spin_unlock(&inode->i_lock); } diff --git a/fs/inode.c b/fs/inode.c index 9f49e0bdc2f7..51726f2ad994 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -28,7 +28,7 @@ * Inode locking rules: * * inode->i_lock protects: - * inode->i_state, inode->i_hash, __iget() + * inode->i_state, inode->i_hash, __iget(), inode->i_io_list * Inode LRU list locks protect: * inode->i_sb->s_inode_lru, inode->i_lru * inode->i_sb->s_inode_list_lock protects: