[PATCH v7 13/13] dm: add non power of 2 zoned target

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Only power of 2(po2) zoned devices were supported in linux but now non
power of 2(npo2) zoned device support has been added to the block layer.

Filesystems such as F2FS and btrfs have support for zoned devices with
po2 zone size assumption. Before adding native support for npo2 zoned
devices, it was suggested to create a dm target for npo2 zoned device to
appear as po2 device so that file systems can initially work without any
explicit changes by using this target.

The design of this target is very simple: introduce gaps between the zone
capacity and the po2 zone size of the underlying device. All IOs will be
remapped from target to the actual device location. For devices that use
zone append, the bi_sector is remapped from device to target's layout.

The read IOs that fall in the "emulated" gap area will return 0 and all
the other IOs in that area will result in an error. If an read IO span
across the zone capacity boundary, then the IOs are split between the
boundary. All other IO operations that span across a zone capacity
boundary will result in an error.

The target can be easily updated as follows:
dmsetup create <label> --table '0 <size_sects> zoned-npo2 /dev/nvme<id>'

Signed-off-by: Pankaj Raghav <p.raghav@xxxxxxxxxxx>
Suggested-by: Johannes Thumshirn <johannes.thumshirn@xxxxxxx>
Suggested-by: Damien Le Moal <damien.lemoal@xxxxxxx>
Suggested-by: Hannes Reinecke <hare@xxxxxxx>
---
 drivers/md/Kconfig                |   9 +
 drivers/md/Makefile               |   2 +
 drivers/md/dm-zone.c              |   9 +
 drivers/md/dm-zoned-npo2-target.c | 268 ++++++++++++++++++++++++++++++
 4 files changed, 288 insertions(+)
 create mode 100644 drivers/md/dm-zoned-npo2-target.c

diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 998a5cfdb..773314536 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -518,6 +518,15 @@ config DM_FLAKEY
 	help
 	 A target that intermittently fails I/O for debugging purposes.
 
+config DM_ZONED_NPO2
+	tristate "Zoned non power of 2 target"
+	depends on BLK_DEV_DM
+	depends on BLK_DEV_ZONED
+	help
+	A target that converts a zoned device with non power of 2 zone size to
+	be power of 2. This is done by introducing gaps in between the zone
+	capacity and the power of 2 zone size.
+
 config DM_VERITY
 	tristate "Verity target support"
 	depends on BLK_DEV_DM
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 0454b0885..2863a94a7 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -26,6 +26,7 @@ dm-era-y	+= dm-era-target.o
 dm-clone-y	+= dm-clone-target.o dm-clone-metadata.o
 dm-verity-y	+= dm-verity-target.o
 dm-zoned-y	+= dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o
+dm-zoned-npo2-y       += dm-zoned-npo2-target.o
 
 md-mod-y	+= md.o md-bitmap.o
 raid456-y	+= raid5.o raid5-cache.o raid5-ppl.o
@@ -60,6 +61,7 @@ obj-$(CONFIG_DM_CRYPT)		+= dm-crypt.o
 obj-$(CONFIG_DM_DELAY)		+= dm-delay.o
 obj-$(CONFIG_DM_DUST)		+= dm-dust.o
 obj-$(CONFIG_DM_FLAKEY)		+= dm-flakey.o
+obj-$(CONFIG_DM_ZONED_NPO2)	+= dm-zoned-npo2.o
 obj-$(CONFIG_DM_MULTIPATH)	+= dm-multipath.o dm-round-robin.o
 obj-$(CONFIG_DM_MULTIPATH_QL)	+= dm-queue-length.o
 obj-$(CONFIG_DM_MULTIPATH_ST)	+= dm-service-time.o
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index af36d33f9..5efb31ba0 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -210,6 +210,11 @@ static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx,
 		}
 		md->zwp_offset[idx] = dm_get_zone_wp_offset(zone);
 
+		if (q->limits.chunk_sectors != zone->len) {
+			blk_queue_chunk_sectors(q, zone->len);
+			q->nr_zones = blkdev_nr_zones(md->disk);
+		}
+
 		break;
 	default:
 		DMERR("Invalid zone type 0x%x at sectors %llu",
@@ -307,6 +312,9 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
 	if (dm_table_supports_zone_append(t)) {
 		clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
 		dm_cleanup_zoned_dev(md);
+
+		if (!is_power_of_2(blk_queue_zone_sectors(q)))
+			goto revalidate_zones;
 		return 0;
 	}
 
@@ -318,6 +326,7 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
 	if (!get_capacity(md->disk))
 		return 0;
 
+revalidate_zones:
 	return dm_revalidate_zones(md, t);
 }
 
diff --git a/drivers/md/dm-zoned-npo2-target.c b/drivers/md/dm-zoned-npo2-target.c
new file mode 100644
index 000000000..c1373d3ea
--- /dev/null
+++ b/drivers/md/dm-zoned-npo2-target.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/device-mapper.h>
+
+#define DM_MSG_PREFIX "zoned-npo2"
+
+struct dmz_npo2_target {
+	struct dm_dev *dev;
+	sector_t zsze;
+	sector_t zsze_po2;
+	sector_t zsze_diff;
+	u32 nr_zones;
+};
+
+enum dmz_npo2_io_cond {
+	DMZ_NPO2_IO_INSIDE_ZONE,
+	DMZ_NPO2_IO_ACROSS_ZONE,
+	DMZ_NPO2_IO_OUTSIDE_ZONE,
+};
+
+static inline u32 npo2_zone_no(struct dmz_npo2_target *dmh, sector_t sect)
+{
+	return div64_u64(sect, dmh->zsze);
+}
+
+static inline u32 po2_zone_no(struct dmz_npo2_target *dmh, sector_t sect)
+{
+	return sect >> ilog2(dmh->zsze_po2);
+}
+
+static inline sector_t target_to_device_sect(struct dmz_npo2_target *dmh,
+					     sector_t sect)
+{
+	u32 zone_idx = po2_zone_no(dmh, sect);
+
+	sect -= (zone_idx * dmh->zsze_diff);
+
+	return sect;
+}
+
+static inline sector_t device_to_target_sect(struct dmz_npo2_target *dmh,
+					     sector_t sect)
+{
+	u32 zone_idx = npo2_zone_no(dmh, sect);
+
+	sect += (zone_idx * dmh->zsze_diff);
+
+	return sect;
+}
+
+/*
+ * <dev-path>
+ * This target works on the complete zoned device. Partial mapping is not
+ * supported
+ */
+static int dmz_npo2_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+	struct dmz_npo2_target *dmh = NULL;
+	int ret = 0;
+	sector_t zsze;
+	sector_t disk_size;
+
+	if (argc < 1)
+		return -EINVAL;
+
+	dmh = kmalloc(sizeof(*dmh), GFP_KERNEL);
+	if (!dmh)
+		return -ENOMEM;
+
+	ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
+			    &dmh->dev);
+
+	zsze = blk_queue_zone_sectors(bdev_get_queue(dmh->dev->bdev));
+
+	disk_size = get_capacity(dmh->dev->bdev->bd_disk);
+
+	if (ti->len != disk_size || ti->begin) {
+		DMERR("%pg Partial mapping of the target not supported",
+		      dmh->dev->bdev);
+		return -EINVAL;
+	}
+
+	if (is_power_of_2(zsze)) {
+		DMERR("%pg zone size is power of 2", dmh->dev->bdev);
+		return -EINVAL;
+	}
+
+	dmh->zsze = zsze;
+	dmh->zsze_po2 = 1 << get_count_order_long(zsze);
+	dmh->zsze_diff = dmh->zsze_po2 - dmh->zsze;
+
+	ti->private = dmh;
+	ti->num_flush_bios = 1;
+	ti->num_discard_bios = 1;
+	ti->num_secure_erase_bios = 1;
+	ti->num_write_zeroes_bios = 1;
+
+	dmh->nr_zones = npo2_zone_no(dmh, ti->len);
+	ti->len = dmh->zsze_po2 * dmh->nr_zones;
+
+	return 0;
+}
+
+static int dmz_npo2_report_zones_cb(struct blk_zone *zone, unsigned int idx,
+				    void *data)
+{
+	struct dm_report_zones_args *args = data;
+	struct dmz_npo2_target *dmh = args->tgt->private;
+
+	zone->start = device_to_target_sect(dmh, zone->start);
+	zone->wp = device_to_target_sect(dmh, zone->wp);
+	zone->len = dmh->zsze_po2;
+	args->next_sector = zone->start + zone->len;
+
+	return args->orig_cb(zone, args->zone_idx++, args->orig_data);
+}
+
+static int dmz_npo2_report_zones(struct dm_target *ti,
+				 struct dm_report_zones_args *args,
+				 unsigned int nr_zones)
+{
+	struct dmz_npo2_target *dmh = ti->private;
+	int ret = 0;
+	sector_t sect = po2_zone_no(dmh, args->next_sector) * dmh->zsze;
+
+	ret = blkdev_report_zones(dmh->dev->bdev, sect, nr_zones,
+				  dmz_npo2_report_zones_cb, args);
+	if (ret < 0)
+		DMERR("report zones error");
+
+	return ret;
+}
+
+static int check_zone_boundary_violation(struct dmz_npo2_target *dmh,
+					 sector_t sect, sector_t size)
+{
+	u32 zone_idx = po2_zone_no(dmh, sect);
+	sector_t relative_sect = 0;
+
+	sect = target_to_device_sect(dmh, sect);
+	relative_sect = sect - (zone_idx * dmh->zsze);
+
+	if ((relative_sect + size) <= dmh->zsze)
+		return DMZ_NPO2_IO_INSIDE_ZONE;
+	else if (relative_sect >= dmh->zsze)
+		return DMZ_NPO2_IO_OUTSIDE_ZONE;
+
+	return DMZ_NPO2_IO_ACROSS_ZONE;
+}
+
+static void split_io_across_zone_boundary(struct dmz_npo2_target *dmh,
+					  struct bio *bio)
+{
+	sector_t sect = bio->bi_iter.bi_sector;
+	sector_t sects_from_zone_start;
+
+	sect = target_to_device_sect(dmh, sect);
+	div64_u64_rem(sect, dmh->zsze, &sects_from_zone_start);
+	dm_accept_partial_bio(bio, dmh->zsze - sects_from_zone_start);
+	bio->bi_iter.bi_sector = sect;
+}
+
+static int handle_zone_boundary_violation(struct dmz_npo2_target *dmh,
+					  struct bio *bio,
+					  enum dmz_npo2_io_cond cond)
+{
+	/* Read should return zeroed page */
+	if (bio_op(bio) == REQ_OP_READ) {
+		if (cond == DMZ_NPO2_IO_ACROSS_ZONE) {
+			split_io_across_zone_boundary(dmh, bio);
+			return DM_MAPIO_REMAPPED;
+		}
+		zero_fill_bio(bio);
+		bio_endio(bio);
+		return DM_MAPIO_SUBMITTED;
+	}
+	return DM_MAPIO_KILL;
+}
+
+static int dmz_npo2_end_io(struct dm_target *ti, struct bio *bio,
+			   blk_status_t *error)
+{
+	struct dmz_npo2_target *dmh = ti->private;
+
+	if (bio->bi_status == BLK_STS_OK && bio_op(bio) == REQ_OP_ZONE_APPEND)
+		bio->bi_iter.bi_sector =
+			device_to_target_sect(dmh, bio->bi_iter.bi_sector);
+
+	return DM_ENDIO_DONE;
+}
+
+static int dmz_npo2_map(struct dm_target *ti, struct bio *bio)
+{
+	struct dmz_npo2_target *dmh = ti->private;
+	enum dmz_npo2_io_cond cond;
+
+	bio_set_dev(bio, dmh->dev->bdev);
+	if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio))) {
+		cond = check_zone_boundary_violation(dmh, bio->bi_iter.bi_sector,
+						     bio->bi_iter.bi_size >> SECTOR_SHIFT);
+
+		/*
+		 * If the starting sector is in the emulated area then fill
+		 * all the bio with zeros. If bio is across boundaries,
+		 * split the bio across boundaries and fill zeros only for the
+		 * bio that is outside the zone capacity
+		 */
+		switch (cond) {
+		case DMZ_NPO2_IO_INSIDE_ZONE:
+			bio->bi_iter.bi_sector = target_to_device_sect(dmh,
+								       bio->bi_iter.bi_sector);
+			break;
+		case DMZ_NPO2_IO_ACROSS_ZONE:
+		case DMZ_NPO2_IO_OUTSIDE_ZONE:
+			return handle_zone_boundary_violation(dmh, bio, cond);
+		}
+	}
+	return DM_MAPIO_REMAPPED;
+}
+
+static int dmz_npo2_iterate_devices(struct dm_target *ti,
+				    iterate_devices_callout_fn fn, void *data)
+{
+	struct dmz_npo2_target *dmh = ti->private;
+	sector_t len = 0;
+
+	len = dmh->nr_zones * dmh->zsze;
+	return fn(ti, dmh->dev, 0, len, data);
+}
+
+static struct target_type dmz_npo2_target = {
+	.name = "zoned-npo2",
+	.version = { 1, 0, 0 },
+	.features = DM_TARGET_ZONED_HM,
+	.map = dmz_npo2_map,
+	.end_io = dmz_npo2_end_io,
+	.report_zones = dmz_npo2_report_zones,
+	.iterate_devices = dmz_npo2_iterate_devices,
+	.module = THIS_MODULE,
+	.ctr = dmz_npo2_ctr,
+};
+
+static int __init dmz_npo2_init(void)
+{
+	int r = dm_register_target(&dmz_npo2_target);
+
+	if (r < 0)
+		DMERR("register failed %d", r);
+
+	return r;
+}
+
+static void __exit dmz_npo2_exit(void)
+{
+	dm_unregister_target(&dmz_npo2_target);
+}
+
+/* Module hooks */
+module_init(dmz_npo2_init);
+module_exit(dmz_npo2_exit);
+
+MODULE_DESCRIPTION(DM_NAME " non power 2 zoned target");
+MODULE_AUTHOR("Pankaj Raghav <p.raghav@xxxxxxxxxxx>");
+MODULE_LICENSE("GPL");
+
-- 
2.25.1

--
dm-devel mailing list
dm-devel@xxxxxxxxxx
https://listman.redhat.com/mailman/listinfo/dm-devel




[Index of Archives]     [DM Crypt]     [Fedora Desktop]     [ATA RAID]     [Fedora Marketing]     [Fedora Packaging]     [Fedora SELinux]     [Yosemite Discussion]     [KDE Users]     [Fedora Docs]

  Powered by Linux