On 10/11/20 7:26 pm, Naohiro Aota wrote:
The zone append write command has a maximum IO size restriction it
accepts. This is because a zone append write command cannot be split, as
we ask the device to place the data into a specific target zone and the
device responds with the actual written location of the data.
Introduce max_zone_append_size to zone_info and fs_info to track the
value, so we can limit all I/O to a zoned block device that we want to
write using the zone append command to the device's limits.
Reviewed-by: Josef Bacik <josef@xxxxxxxxxxxxxx>
Signed-off-by: Naohiro Aota <naohiro.aota@xxxxxxx>
---
Looks good except for - what happens when we replace or add a new zone
device with a different queue_max_zone_append_sectors(queue) value. ?
Nit: IMHO some parts of patch-4, 5 and 6 could have been in one
patch. Now it's fine as they are already at v10 and have rb.
Thanks, Anand
fs/btrfs/ctree.h | 3 +++
fs/btrfs/zoned.c | 17 +++++++++++++++--
fs/btrfs/zoned.h | 1 +
3 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 453f41ca024e..c70d3fcc62c2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -954,6 +954,9 @@ struct btrfs_fs_info {
u64 zoned;
};
+ /* Max size to emit ZONE_APPEND write command */
+ u64 max_zone_append_size;
+
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
spinlock_t ref_verify_lock;
struct rb_root block_tree;
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 1223d5b0e411..2897432eb43c 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -48,6 +48,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
{
struct btrfs_zoned_device_info *zone_info = NULL;
struct block_device *bdev = device->bdev;
+ struct request_queue *queue = bdev_get_queue(bdev);
sector_t nr_sectors = bdev->bd_part->nr_sects;
sector_t sector = 0;
struct blk_zone *zones = NULL;
@@ -69,6 +70,8 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
ASSERT(is_power_of_2(zone_sectors));
zone_info->zone_size = (u64)zone_sectors << SECTOR_SHIFT;
zone_info->zone_size_shift = ilog2(zone_info->zone_size);
+ zone_info->max_zone_append_size =
+ (u64)queue_max_zone_append_sectors(queue) << SECTOR_SHIFT;
zone_info->nr_zones = nr_sectors >> ilog2(bdev_zone_sectors(bdev));
if (!IS_ALIGNED(nr_sectors, zone_sectors))
zone_info->nr_zones++;
@@ -188,6 +191,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
u64 zoned_devices = 0;
u64 nr_devices = 0;
u64 zone_size = 0;
+ u64 max_zone_append_size = 0;
const bool incompat_zoned = btrfs_is_zoned(fs_info);
int ret = 0;
@@ -201,10 +205,13 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
model = bdev_zoned_model(device->bdev);
if (model == BLK_ZONED_HM ||
(model == BLK_ZONED_HA && incompat_zoned)) {
+ struct btrfs_zoned_device_info *zone_info =
+ device->zone_info;
+
zoned_devices++;
if (!zone_size) {
- zone_size = device->zone_info->zone_size;
- } else if (device->zone_info->zone_size != zone_size) {
+ zone_size = zone_info->zone_size;
+ } else if (zone_info->zone_size != zone_size) {
btrfs_err(fs_info,
"zoned: unequal block device zone sizes: have %llu found %llu",
device->zone_info->zone_size,
@@ -212,6 +219,11 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
ret = -EINVAL;
goto out;
}
+ if (!max_zone_append_size ||
+ (zone_info->max_zone_append_size &&
+ zone_info->max_zone_append_size < max_zone_append_size))
+ max_zone_append_size =
+ zone_info->max_zone_append_size;
}
nr_devices++;
}
@@ -255,6 +267,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
}
fs_info->zone_size = zone_size;
+ fs_info->max_zone_append_size = max_zone_append_size;
btrfs_info(fs_info, "zoned mode enabled with zone size %llu",
fs_info->zone_size);
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index bcb1cb99a4f3..52aa6af5d8dc 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -13,6 +13,7 @@ struct btrfs_zoned_device_info {
*/
u64 zone_size;
u8 zone_size_shift;
+ u64 max_zone_append_size;
u32 nr_zones;
unsigned long *seq_zones;
unsigned long *empty_zones;