From: Naohiro Aota <naohiro.aota@xxxxxxx> [ Upstream commit 5f0addf7b89085f8e0a2593faa419d6111612b9b ] Currently, we use btrfs_inode_{lock,unlock}() to grant an exclusive writeback of the relocation data inode in btrfs_zoned_data_reloc_{lock,unlock}(). However, that can cause a deadlock in the following path. Thread A takes btrfs_inode_lock() and waits for metadata reservation by e.g, waiting for writeback: prealloc_file_extent_cluster() - btrfs_inode_lock(&inode->vfs_inode, 0); - btrfs_prealloc_file_range() ... - btrfs_replace_file_extents() - btrfs_start_transaction ... - btrfs_reserve_metadata_bytes() Thread B (e.g, doing a writeback work) needs to wait for the inode lock to continue writeback process: do_writepages - btrfs_writepages - extent_writpages - btrfs_zoned_data_reloc_lock(BTRFS_I(inode)); - btrfs_inode_lock() The deadlock is caused by relying on the vfs_inode's lock. By using it, we introduced unnecessary exclusion of writeback and btrfs_prealloc_file_range(). Also, the lock at this point is useless as we don't have any dirty pages in the inode yet. Introduce fs_info->zoned_data_reloc_io_lock and use it for the exclusive writeback. Fixes: 35156d852762 ("btrfs: zoned: only allow one process to add pages to a relocation inode") CC: stable@xxxxxxxxxxxxxxx # 5.16.x: 869f4cdc73f9: btrfs: zoned: encapsulate inode locking for zoned relocation CC: stable@xxxxxxxxxxxxxxx # 5.16.x CC: stable@xxxxxxxxxxxxxxx # 5.17 Cc: Johannes Thumshirn <johannes.thumshirn@xxxxxxx> Reviewed-by: Johannes Thumshirn <johannes.thumshirn@xxxxxxx> Signed-off-by: Naohiro Aota <naohiro.aota@xxxxxxx> Signed-off-by: David Sterba <dsterba@xxxxxxxx> Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/zoned.h | 4 ++-- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index cc72d8981c47..d1838de0b39c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1027,6 +1027,7 @@ struct btrfs_fs_info { */ spinlock_t relocation_bg_lock; u64 data_reloc_bg; + struct mutex zoned_data_reloc_io_lock; #ifdef CONFIG_BTRFS_FS_REF_VERIFY spinlock_t ref_verify_lock; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 233d894f6feb..909d19656316 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2914,6 +2914,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->delalloc_root_mutex); mutex_init(&fs_info->zoned_meta_io_lock); + mutex_init(&fs_info->zoned_data_reloc_io_lock); seqlock_init(&fs_info->profiles_lock); INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index d680c3ee918a..3a826f7c2040 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -330,7 +330,7 @@ static inline void btrfs_zoned_data_reloc_lock(struct btrfs_inode *inode) struct btrfs_root *root = inode->root; if (btrfs_is_data_reloc_root(root) && btrfs_is_zoned(root->fs_info)) - btrfs_inode_lock(&inode->vfs_inode, 0); + mutex_lock(&root->fs_info->zoned_data_reloc_io_lock); } static inline void btrfs_zoned_data_reloc_unlock(struct btrfs_inode *inode) @@ -338,7 +338,7 @@ static inline void btrfs_zoned_data_reloc_unlock(struct btrfs_inode *inode) struct btrfs_root *root = inode->root; if (btrfs_is_data_reloc_root(root) && btrfs_is_zoned(root->fs_info)) - btrfs_inode_unlock(&inode->vfs_inode, 0); + mutex_unlock(&root->fs_info->zoned_data_reloc_io_lock); } #endif -- 2.35.1