Re: [PATCH 11/11] dm-zoned: metadata version 2

Hannes Reinecke <hare@xxxxxxx> · Tue, 14 Apr 2020 07:46:56 +0200

On 4/13/20 6:55 AM, Damien Le Moal wrote:
On 2020/04/09 15:45, Hannes Reinecke wrote:
Implement handling for metadata version 2. The new metadata adds
a label and UUID for the device mapper device, and additional UUID
for the underlying block devices.
It also allows for an additional regular drive to be used for
emulating random access zones. The emulated zones will be placed
logically in front of the zones from the zoned block device, causing
the superblocks and metadata to be stored on that device.
The first zone of the original zoned device will be used to hold
another, tertiary copy of the metadata; this copy carries a
generation number of 0 and is never updated; it's just used
for identification.

Signed-off-by: Hannes Reinecke <hare@xxxxxxx>
---
  drivers/md/dm-zoned-metadata.c | 277 +++++++++++++++++++++++++++------
  drivers/md/dm-zoned-target.c   | 126 ++++++++-------
  drivers/md/dm-zoned.h          |   7 +-
  3 files changed, 306 insertions(+), 104 deletions(-)

diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 554ff32abada..36a71f50165d 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -16,7 +16,7 @@
  /*
   * Metadata version.
   */
-#define DMZ_META_VER	1
+#define DMZ_META_VER	2
  
  /*
   * On-disk super block magic.
@@ -69,8 +69,17 @@ struct dmz_super {
  	/* Checksum */
  	__le32		crc;			/*  48 */
  
+	/* DM-Zoned label */
+	u8		dmz_label[32];		/*  80 */
+
+	/* DM-Zoned UUID */
+	u8		dmz_uuid[16];		/*  96 */
+
+	/* Device UUID */
+	u8		dev_uuid[16];		/* 112 */
+
  	/* Padding to full 512B sector */
-	u8		reserved[464];		/* 512 */
+	u8		reserved[400];		/* 512 */
  };
  
  /*
@@ -135,6 +144,8 @@ struct dmz_metadata {
  	struct dmz_dev		*dev;
  
  	char			devname[BDEVNAME_SIZE];
+	char			label[BDEVNAME_SIZE];
+	uuid_t			uuid;
  
  	sector_t		zone_bitmap_size;
  	unsigned int		zone_nr_bitmap_blocks;
@@ -161,8 +172,9 @@ struct dmz_metadata {
  	/* Zone information array */
  	struct dm_zone		*zones;
  
-	struct dmz_sb		sb[2];
+	struct dmz_sb		sb[3];
  	unsigned int		mblk_primary;
+	unsigned int		sb_version;
  	u64			sb_gen;
  	unsigned int		min_nr_mblks;
  	unsigned int		max_nr_mblks;
@@ -195,31 +207,56 @@ struct dmz_metadata {
  };
  
  #define dmz_zmd_info(zmd, format, args...)	\
-	DMINFO("(%s): " format, (zmd)->devname, ## args)
+	DMINFO("(%s): " format, (zmd)->label, ## args)
  
  #define dmz_zmd_err(zmd, format, args...)	\
-	DMERR("(%s): " format, (zmd)->devname, ## args)
+	DMERR("(%s): " format, (zmd)->label, ## args)
  
  #define dmz_zmd_warn(zmd, format, args...)	\
-	DMWARN("(%s): " format, (zmd)->devname, ## args)
+	DMWARN("(%s): " format, (zmd)->label, ## args)
  
  #define dmz_zmd_debug(zmd, format, args...)	\
-	DMDEBUG("(%s): " format, (zmd)->devname, ## args)
+	DMDEBUG("(%s): " format, (zmd)->label, ## args)
  /*
   * Various accessors
   */
+unsigned int dmz_dev_zone_id(struct dmz_metadata *zmd, struct dm_zone *zone)
+{
+	unsigned int zone_id;
+
+	if (WARN_ON(!zone))
+		return 0;
+
+	zone_id = zone->id;
+	if (zmd->dev[0].zone_offset &&

The name zone_offset is maybe a little confusing as one could think it is a
sector offset. May be call that zone_id_offset ?


Yeah, right. I'm not utterly happy with the naming myself.

+	    (zone_id >= zmd->dev[0].zone_offset))
+		zone_id -= zmd->dev[0].zone_offset;
+	return zone_id;
+}
+
  sector_t dmz_start_sect(struct dmz_metadata *zmd, struct dm_zone *zone)
  {
-	return (sector_t)zone->id << zmd->zone_nr_sectors_shift;
+	unsigned int zone_id = dmz_dev_zone_id(zmd, zone);
+
+	return (sector_t)zone_id << zmd->zone_nr_sectors_shift;
  }
  
  sector_t dmz_start_block(struct dmz_metadata *zmd, struct dm_zone *zone)
  {
-	return (sector_t)zone->id << zmd->zone_nr_blocks_shift;
+	unsigned int zone_id = dmz_dev_zone_id(zmd, zone);
+
+	return (sector_t)zone_id << zmd->zone_nr_blocks_shift;
  }
  
  struct dmz_dev *dmz_zone_to_dev(struct dmz_metadata *zmd, struct dm_zone *zone)
  {
+	if (WARN_ON(!zone))
+		return &zmd->dev[0];

return NULL here so that we get EIO (or other error) from caller ? Otherwise,
the bug here could go unnoticed and bad IOs go to dev[0].

Ok.

+
+	if (zmd->dev[0].zone_offset &&
+	    zone->id < zmd->dev[0].zone_offset)
+			return &zmd->dev[1];
+
  	return &zmd->dev[0];
  }

OK. This one still confuses me. I think we need to have a comment here reminding
that when there are 2 devices, the second one holds the low ID zones and the
first one (the SMR drive) the high ID zones. While I think it is OK as is (with
the comment), I still think we should reverse that as the reverse would be more
natural...

Sure, we can reverse the order of devices by using the regular disk 
device as first and the SMR drive as second argument when creating the 
device-mapper device:

0 <size> zoned <regular device> <zoned device>

That would make it more natural, and we don't have to rearrange disks 
within the code.

We'd lose compability with v1, but one could argue it's not a bad thing.

[ .. ]

@@ -693,60 +697,47 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
  /*
   * Get zoned device information.
   */
-static int dmz_get_zoned_device(struct dm_target *ti, char *path)
+static int dmz_get_zoned_device(struct dm_target *ti, char *path,
+				struct dmz_dev *dev, int num)
  {
  	struct dmz_target *dmz = ti->private;
  	struct request_queue *q;
-	struct dmz_dev *dev;
-	sector_t aligned_capacity;
  	int ret;
  
  	/* Get the target device */
-	ret = dm_get_device(ti, path, dm_table_get_mode(ti->table), &dmz->ddev);
+	ret = dm_get_device(ti, path, dm_table_get_mode(ti->table),
+			    &dmz->ddev[num]);
  	if (ret) {
  		ti->error = "Get target device failed";
-		dmz->ddev = NULL;
+		dmz->ddev[num] = NULL;
  		return ret;
  	}
  
-	dev = kzalloc(sizeof(struct dmz_dev), GFP_KERNEL);
-	if (!dev) {
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	dev->bdev = dmz->ddev->bdev;
+	dev->bdev = dmz->ddev[num]->bdev;
  	(void)bdevname(dev->bdev, dev->name);
  
-	if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE) {
-		ti->error = "Not a zoned block device";
-		ret = -EINVAL;
-		goto err;
-	}
+	if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE)
+		dev->flags = DMZ_BDEV_REGULAR;
  
  	q = bdev_get_queue(dev->bdev);
  	dev->capacity = i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
-	aligned_capacity = dev->capacity &
-				~((sector_t)blk_queue_zone_sectors(q) - 1);
-	if (ti->begin ||
-	    ((ti->len != dev->capacity) && (ti->len != aligned_capacity))) {
-		ti->error = "Partial mapping not supported";
-		ret = -EINVAL;
-		goto err;
+	if (ti->begin) {
+		ti->error = "Partial mapping is not supported";
+		dm_put_device(ti, dmz->ddev[num]);
+		dmz->ddev[num] = NULL;
+		return -EINVAL;
  	}
  
-	dev->zone_nr_sectors = blk_queue_zone_sectors(q);
-
-	dev->nr_zones = blkdev_nr_zones(dev->bdev->bd_disk);
-
-	dmz->dev = dev;
-
+	if (num == 1) {
+		dev->zone_nr_sectors = dmz->dev[0].zone_nr_sectors;
+		dev->nr_zones = dev->capacity / dev->zone_nr_sectors;
+		if (dev->capacity % dev->nr_zones)
+			dev->nr_zones++;

dev->nr_zones =
	(dev->capacity + dev->zone_nr_sectors - 1) / dev->zone_nr_sectors;

or use DIV_ROUND_UP() ?

OK.

And may be add a comment to remind (again) that znd->dev[1] is the normal disk
so we cannot use blk_queue_zone_sectors() and blkdev_nr_zones().

Or, better still, check if it's a regular device and drop the magic 'num 
== 1' comparison.

Thanks for the review, will be sending a v4.

Cheers,

Hannes
--
Dr. Hannes Reinecke            Teamlead Storage & Networking
hare@xxxxxxx                               +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer


--
dm-devel mailing list
dm-devel@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/dm-devel