Signed-off-by: Naohiro Aota <naohiro.aota@xxxxxxx>
---
fs/btrfs/block-group.c | 4 +-
fs/btrfs/zoned.c | 99 +++++++++++++++++++++++++++++++++++++++---
fs/btrfs/zoned.h | 4 +-
3 files changed, 98 insertions(+), 9 deletions(-)
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 0140fafedb6a..349b2a09bdf1 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1851,7 +1851,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
goto error;
}
- ret = btrfs_load_block_group_zone_info(cache);
+ ret = btrfs_load_block_group_zone_info(cache, false);
if (ret) {
btrfs_err(info, "zoned: failed to load zone info of bg %llu",
cache->start);
@@ -2146,7 +2146,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
cache->needs_free_space = 1;
- ret = btrfs_load_block_group_zone_info(cache);
+ ret = btrfs_load_block_group_zone_info(cache, true);
if (ret) {
btrfs_put_block_group(cache);
return ret;
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 22c0665ee816..ca7aef252d33 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -930,7 +930,68 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
return 0;
}
-int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache)
+/*
+ * Calculate an allocation pointer from the extent allocation information
+ * for a block group consist of conventional zones. It is pointed to the
+ * end of the last allocated extent in the block group as an allocation
+ * offset.
+ */
+static int calculate_alloc_pointer(struct btrfs_block_group *cache,
+ u64 *offset_ret)
+{
+ struct btrfs_fs_info *fs_info = cache->fs_info;
+ struct btrfs_root *root = fs_info->extent_root;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ int ret;
+ u64 length;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = cache->start + cache->length;
+ key.type = 0;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ /* We should not find the exact match */
+ if (!ret)
+ ret = -EUCLEAN;
+ if (ret < 0)
+ goto out;
+
+ ret = btrfs_previous_extent_item(root, path, cache->start);
+ if (ret) {
+ if (ret == 1) {
+ ret = 0;
+ *offset_ret = 0;
+ }
+ goto out;
+ }
+
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
+
+ if (found_key.type == BTRFS_EXTENT_ITEM_KEY)
+ length = found_key.offset;
+ else
+ length = fs_info->nodesize;
+
+ if (!(found_key.objectid >= cache->start &&
+ found_key.objectid + length <= cache->start + cache->length)) {
+ ret = -EUCLEAN;
+ goto out;
+ }
+ *offset_ret = found_key.objectid + length - cache->start;
+ ret = 0;
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
struct extent_map_tree *em_tree = &fs_info->mapping_tree;
@@ -944,6 +1005,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache)
int i;
unsigned int nofs_flag;
u64 *alloc_offsets = NULL;
+ u64 last_alloc = 0;
u32 num_sequential = 0, num_conventional = 0;
if (!btrfs_is_zoned(fs_info))
@@ -1042,11 +1104,30 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache)
if (num_conventional > 0) {
/*
- * Since conventional zones do not have a write pointer, we
- * cannot determine alloc_offset from the pointer
+ * Avoid calling calculate_alloc_pointer() for new BG. It
+ * is no use for new BG. It must be always 0.
+ *
+ * Also, we have a lock chain of extent buffer lock ->
+ * chunk mutex. For new BG, this function is called from
+ * btrfs_make_block_group() which is already taking the
+ * chunk mutex. Thus, we cannot call
+ * calculate_alloc_pointer() which takes extent buffer
+ * locks to avoid deadlock.
*/
- ret = -EINVAL;
- goto out;
+ if (new) {
+ cache->alloc_offset = 0;
+ goto out;
+ }
+ ret = calculate_alloc_pointer(cache, &last_alloc);
+ if (ret || map->num_stripes == num_conventional) {
+ if (!ret)
+ cache->alloc_offset = last_alloc;
+ else
+ btrfs_err(fs_info,
+ "zoned: failed to determine allocation offset of bg %llu",
+ cache->start);
+ goto out;
+ }
}
switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
@@ -1068,6 +1149,14 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache)
}
out:
+ /* An extent is allocated after the write pointer */
+ if (!ret && num_conventional && last_alloc > cache->alloc_offset) {
+ btrfs_err(fs_info,
+ "zoned: got wrong write pointer in BG %llu: %llu > %llu",
+ logical, last_alloc, cache->alloc_offset);
+ ret = -EIO;
+ }
+
kfree(alloc_offsets);
free_extent_map(em);
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 491b98c97f48..b53403ba0b10 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -41,7 +41,7 @@ u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start,
int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
u64 length, u64 *bytes);
int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size);
-int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache);
+int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new);
#else /* CONFIG_BLK_DEV_ZONED */
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
struct blk_zone *zone)
@@ -119,7 +119,7 @@ static inline int btrfs_ensure_empty_zones(struct btrfs_device *device,
}
static inline int btrfs_load_block_group_zone_info(
- struct btrfs_block_group *cache)
+ struct btrfs_block_group *cache, bool new)
{
return 0;
}