On/After degraded mount, we might have no writable metadata block group due to broken write pointers. If you e.g. balance the FS before writing any data, alloc_tree_block_no_bg_flush() (called from insert_balance_item()) fails to allocate a tree block for it, due to global reservation failure. We can reproduce this situation with xfstests btrfs/124. While we can workaround the failure if we write some data and, as a result of writing, let a new metadata block group allocated, it's a bad practice to apply. This commit avoids such failures by ensuring that read-write mounted volume has non-zero metadata space. If metadata space is empty, it forces new metadata block group allocation. Signed-off-by: Naohiro Aota <naohiro.aota@xxxxxxx> --- fs/btrfs/disk-io.c | 9 +++++++++ fs/btrfs/hmzoned.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/hmzoned.h | 6 ++++++ 3 files changed, 60 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index deca9fd70771..7f4c6a92079a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3246,6 +3246,15 @@ int __cold open_ctree(struct super_block *sb, } } + ret = btrfs_hmzoned_check_metadata_space(fs_info); + if (ret) { + btrfs_warn(fs_info, "failed to allocate metadata space: %d", + ret); + btrfs_warn(fs_info, "try remount with readonly"); + close_ctree(fs_info); + return ret; + } + down_read(&fs_info->cleanup_work_sem); if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) || (ret = btrfs_orphan_cleanup(fs_info->tree_root))) { diff --git a/fs/btrfs/hmzoned.c b/fs/btrfs/hmzoned.c index 83dc2dc22323..4fd96fd43897 100644 --- a/fs/btrfs/hmzoned.c +++ b/fs/btrfs/hmzoned.c @@ -16,6 +16,8 @@ #include "disk-io.h" #include "block-group.h" #include "locking.h" +#include "space-info.h" +#include "transaction.h" /* Maximum number of zones to report per blkdev_report_zones() call */ #define BTRFS_REPORT_NR_ZONES 4096 @@ -1075,3 +1077,46 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache) return ret; } + +/* + * On/After degraded mount, we might have no writable metadata block + * group due to broken write pointers. If you e.g. balance the FS + * before writing any data, alloc_tree_block_no_bg_flush() (called + * from insert_balance_item())fails to allocate a tree block for + * it. To avoid such situations, ensure we have some metadata BG here. + */ +int btrfs_hmzoned_check_metadata_space(struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *root = fs_info->extent_root; + struct btrfs_trans_handle *trans; + struct btrfs_space_info *info; + u64 left; + int ret; + + if (!btrfs_fs_incompat(fs_info, HMZONED)) + return 0; + + info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); + spin_lock(&info->lock); + left = info->total_bytes - btrfs_space_info_used(info, true); + spin_unlock(&info->lock); + + if (left) + return 0; + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + mutex_lock(&fs_info->chunk_mutex); + ret = btrfs_alloc_chunk(trans, btrfs_metadata_alloc_profile(fs_info)); + if (ret) { + mutex_unlock(&fs_info->chunk_mutex); + btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); + return ret; + } + mutex_unlock(&fs_info->chunk_mutex); + + return btrfs_commit_transaction(trans); +} diff --git a/fs/btrfs/hmzoned.h b/fs/btrfs/hmzoned.h index 4ed985d027cc..8ac758074afd 100644 --- a/fs/btrfs/hmzoned.h +++ b/fs/btrfs/hmzoned.h @@ -42,6 +42,7 @@ bool btrfs_check_allocatable_zones(struct btrfs_device *device, u64 pos, u64 num_bytes); void btrfs_calc_zone_unusable(struct btrfs_block_group *cache); int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache); +int btrfs_hmzoned_check_metadata_space(struct btrfs_fs_info *fs_info); #else /* CONFIG_BLK_DEV_ZONED */ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone) @@ -97,6 +98,11 @@ static inline int btrfs_load_block_group_zone_info( { return 0; } +static inline int btrfs_hmzoned_check_metadata_space( + struct btrfs_fs_info *fs_info) +{ + return 0; +} #endif static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos) -- 2.24.0