Patch "ext4: fix race when reusing xattr blocks" has been added to the 4.19-stable tree

Sasha Levin <sashal@xxxxxxxxxx> · Mon, 9 Jan 2023 21:28:39 -0500

This is a note to let you know that I've just added the patch titled

    ext4: fix race when reusing xattr blocks

to the 4.19-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     ext4-fix-race-when-reusing-xattr-blocks.patch
and it can be found in the queue-4.19 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@xxxxxxxxxxxxxxx> know about it.



commit b45508d4ada679f38795f3dd0a16d07df565cbcd
Author: Jan Kara <jack@xxxxxxx>
Date:   Tue Jul 12 12:54:24 2022 +0200

    ext4: fix race when reusing xattr blocks
    
    [ Upstream commit 65f8b80053a1b2fd602daa6814e62d6fa90e5e9b ]
    
    When ext4_xattr_block_set() decides to remove xattr block the following
    race can happen:
    
    CPU1                                    CPU2
    ext4_xattr_block_set()                  ext4_xattr_release_block()
      new_bh = ext4_xattr_block_cache_find()
    
                                              lock_buffer(bh);
                                              ref = le32_to_cpu(BHDR(bh)->h_refcount);
                                              if (ref == 1) {
                                                ...
                                                mb_cache_entry_delete();
                                                unlock_buffer(bh);
                                                ext4_free_blocks();
                                                  ...
                                                  ext4_forget(..., bh, ...);
                                                    jbd2_journal_revoke(..., bh);
    
      ext4_journal_get_write_access(..., new_bh, ...)
        do_get_write_access()
          jbd2_journal_cancel_revoke(..., new_bh);
    
    Later the code in ext4_xattr_block_set() finds out the block got freed
    and cancels reusal of the block but the revoke stays canceled and so in
    case of block reuse and journal replay the filesystem can get corrupted.
    If the race works out slightly differently, we can also hit assertions
    in the jbd2 code.
    
    Fix the problem by making sure that once matching mbcache entry is
    found, code dropping the last xattr block reference (or trying to modify
    xattr block in place) waits until the mbcache entry reference is
    dropped. This way code trying to reuse xattr block is protected from
    someone trying to drop the last reference to xattr block.
    
    Reported-and-tested-by: Ritesh Harjani <ritesh.list@xxxxxxxxx>
    CC: stable@xxxxxxxxxxxxxxx
    Fixes: 82939d7999df ("ext4: convert to mbcache2")
    Signed-off-by: Jan Kara <jack@xxxxxxx>
    Link: https://lore.kernel.org/r/20220712105436.32204-5-jack@xxxxxxx
    Signed-off-by: Theodore Ts'o <tytso@xxxxxxx>
    Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption")
    Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index c23d7d68d7a1..f759ab8c1a68 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -437,9 +437,16 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
 /* Remove entry from mbcache when EA inode is getting evicted */
 void ext4_evict_ea_inode(struct inode *inode)
 {
-	if (EA_INODE_CACHE(inode))
-		mb_cache_entry_delete(EA_INODE_CACHE(inode),
-			ext4_xattr_inode_get_hash(inode), inode->i_ino);
+	struct mb_cache_entry *oe;
+
+	if (!EA_INODE_CACHE(inode))
+		return;
+	/* Wait for entry to get unused so that we can remove it */
+	while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
+			ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
+		mb_cache_entry_wait_unused(oe);
+		mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
+	}
 }
 
 static int
@@ -1245,6 +1252,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 	if (error)
 		goto out;
 
+retry_ref:
 	lock_buffer(bh);
 	hash = le32_to_cpu(BHDR(bh)->h_hash);
 	ref = le32_to_cpu(BHDR(bh)->h_refcount);
@@ -1254,9 +1262,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 		 * This must happen under buffer lock for
 		 * ext4_xattr_block_set() to reliably detect freed block
 		 */
-		if (ea_block_cache)
-			mb_cache_entry_delete(ea_block_cache, hash,
-					      bh->b_blocknr);
+		if (ea_block_cache) {
+			struct mb_cache_entry *oe;
+
+			oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
+							  bh->b_blocknr);
+			if (oe) {
+				unlock_buffer(bh);
+				mb_cache_entry_wait_unused(oe);
+				mb_cache_entry_put(ea_block_cache, oe);
+				goto retry_ref;
+			}
+		}
 		get_bh(bh);
 		unlock_buffer(bh);
 
@@ -1883,9 +1900,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 			 * ext4_xattr_block_set() to reliably detect modified
 			 * block
 			 */
-			if (ea_block_cache)
-				mb_cache_entry_delete(ea_block_cache, hash,
-						      bs->bh->b_blocknr);
+			if (ea_block_cache) {
+				struct mb_cache_entry *oe;
+
+				oe = mb_cache_entry_delete_or_get(ea_block_cache,
+					hash, bs->bh->b_blocknr);
+				if (oe) {
+					/*
+					 * Xattr block is getting reused. Leave
+					 * it alone.
+					 */
+					mb_cache_entry_put(ea_block_cache, oe);
+					goto clone_block;
+				}
+			}
 			ea_bdebug(bs->bh, "modifying in-place");
 			error = ext4_xattr_set_entry(i, s, handle, inode,
 						     true /* is_block */);
@@ -1901,6 +1929,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 				goto cleanup;
 			goto inserted;
 		}
+clone_block:
 		unlock_buffer(bs->bh);
 		ea_bdebug(bs->bh, "cloning");
 		s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
@@ -2006,18 +2035,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 				lock_buffer(new_bh);
 				/*
 				 * We have to be careful about races with
-				 * freeing, rehashing or adding references to
-				 * xattr block. Once we hold buffer lock xattr
-				 * block's state is stable so we can check
-				 * whether the block got freed / rehashed or
-				 * not.  Since we unhash mbcache entry under
-				 * buffer lock when freeing / rehashing xattr
-				 * block, checking whether entry is still
-				 * hashed is reliable. Same rules hold for
-				 * e_reusable handling.
+				 * adding references to xattr block. Once we
+				 * hold buffer lock xattr block's state is
+				 * stable so we can check the additional
+				 * reference fits.
 				 */
-				if (hlist_bl_unhashed(&ce->e_hash_list) ||
-				    !ce->e_reusable) {
+				ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
+				if (ref > EXT4_XATTR_REFCOUNT_MAX) {
 					/*
 					 * Undo everything and check mbcache
 					 * again.
@@ -2032,9 +2056,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 					new_bh = NULL;
 					goto inserted;
 				}
-				ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
 				BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
-				if (ref >= EXT4_XATTR_REFCOUNT_MAX)
+				if (ref == EXT4_XATTR_REFCOUNT_MAX)
 					ce->e_reusable = 0;
 				ea_bdebug(new_bh, "reusing; refcount now=%d",
 					  ref);