Superblocks for zoned devices are fixed as 2 zones at 0, 512GB and 4TB. These are fixed at these locations so that recovery tools can reliably retrieve the superblocks even if one of the mirror gets corrupted. power of 2 zone sizes align at these offsets irrespective of their value but non power of 2 zone sizes will not align. To make sure the first zone at mirror 1 and mirror 2 align, write zero operation is performed to move the write pointer of the first zone to the expected offset. This operation is performed only after a zone reset of the first zone, i.e., when the second zone that contains the sb is FULL. Signed-off-by: Pankaj Raghav <p.raghav@xxxxxxxxxxx> --- fs/btrfs/zoned.c | 68 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 3023c871e..805aeaa76 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -760,11 +760,44 @@ int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info) return 0; } +static int fill_sb_wp_offset(struct block_device *bdev, struct blk_zone *zone, + int mirror, u64 *wp_ret) +{ + u64 offset = 0; + int ret = 0; + + ASSERT(!is_power_of_two_u64(zone->len)); + ASSERT(zone->wp == zone->start); + ASSERT(mirror != 0); + + switch (mirror) { + case 1: + div64_u64_rem(BTRFS_SB_LOG_FIRST_OFFSET >> SECTOR_SHIFT, + zone->len, &offset); + break; + case 2: + div64_u64_rem(BTRFS_SB_LOG_SECOND_OFFSET >> SECTOR_SHIFT, + zone->len, &offset); + break; + } + + ret = blkdev_issue_zeroout(bdev, zone->start, offset, GFP_NOFS, 0); + if (ret) + return ret; + + zone->wp += offset; + zone->cond = BLK_ZONE_COND_IMP_OPEN; + *wp_ret = zone->wp << SECTOR_SHIFT; + + return 0; +} + static int sb_log_location(struct block_device *bdev, struct blk_zone *zones, - int rw, u64 *bytenr_ret) + int rw, int mirror, u64 *bytenr_ret) { u64 wp; int ret; + bool zones_empty = false; if (zones[0].type == BLK_ZONE_TYPE_CONVENTIONAL) { *bytenr_ret = zones[0].start << SECTOR_SHIFT; @@ -775,13 +808,31 @@ static int sb_log_location(struct block_device *bdev, struct blk_zone *zones, if (ret != -ENOENT && ret < 0) return ret; + if (ret == -ENOENT) + zones_empty = true; + if (rw == WRITE) { struct blk_zone *reset = NULL; + bool is_sb_offset_write_req = false; + u32 reset_zone_nr = -1; - if (wp == zones[0].start << SECTOR_SHIFT) + if (wp == zones[0].start << SECTOR_SHIFT) { reset = &zones[0]; - else if (wp == zones[1].start << SECTOR_SHIFT) + reset_zone_nr = 0; + } else if (wp == zones[1].start << SECTOR_SHIFT) { reset = &zones[1]; + reset_zone_nr = 1; + } + + /* + * Non po2 zone sizes will not align naturally at + * mirror 1 (512GB) and mirror 2 (4TB). The wp of the + * 1st zone in those superblock mirrors need to be + * moved to align at those offsets. + */ + is_sb_offset_write_req = + (zones_empty || (reset_zone_nr == 0)) && mirror && + !is_power_of_2(zones[0].len); if (reset && reset->cond != BLK_ZONE_COND_EMPTY) { ASSERT(sb_zone_is_full(reset)); @@ -795,6 +846,13 @@ static int sb_log_location(struct block_device *bdev, struct blk_zone *zones, reset->cond = BLK_ZONE_COND_EMPTY; reset->wp = reset->start; } + + if (is_sb_offset_write_req) { + ret = fill_sb_wp_offset(bdev, &zones[0], mirror, &wp); + if (ret) + return ret; + } + } else if (ret != -ENOENT) { /* * For READ, we want the previous one. Move write pointer to @@ -851,7 +909,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw, if (ret != BTRFS_NR_SB_LOG_ZONES) return -EIO; - return sb_log_location(bdev, zones, rw, bytenr_ret); + return sb_log_location(bdev, zones, rw, mirror, bytenr_ret); } int btrfs_sb_log_location(struct btrfs_device *device, int mirror, int rw, @@ -877,7 +935,7 @@ int btrfs_sb_log_location(struct btrfs_device *device, int mirror, int rw, return sb_log_location(device->bdev, &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror], - rw, bytenr_ret); + rw, mirror, bytenr_ret); } static inline bool is_sb_log_zone(struct btrfs_zoned_device_info *zinfo, -- 2.25.1 -- dm-devel mailing list dm-devel@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/dm-devel