On 2020/03/24 20:03, Bob Liu wrote: > Introduce a regular device for storing metadata and buffer write, zoned > device is used by default if no regular device was set by dmsetup. > > The corresponding dmsetup cmd is: > echo "0 $size zoned $regular_device $zoned_device" | dmsetup create $dm-zoned-name > > Signed-off-by: Bob Liu <bob.liu@xxxxxxxxxx> > --- > drivers/md/dm-zoned-target.c | 141 +++++++++++++++++++++++++------------------ > drivers/md/dm-zoned.h | 50 +++++++++++++-- > 2 files changed, 127 insertions(+), 64 deletions(-) > > diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c > index 28f4d00..cae4bfe 100644 > --- a/drivers/md/dm-zoned-target.c > +++ b/drivers/md/dm-zoned-target.c > @@ -35,38 +35,6 @@ struct dm_chunk_work { > }; > > /* > - * Target descriptor. > - */ > -struct dmz_target { > - struct dm_dev *ddev; > - > - unsigned long flags; > - > - /* Zoned block device information */ > - struct dmz_dev *zoned_dev; > - > - /* For metadata handling */ > - struct dmz_metadata *metadata; > - > - /* For reclaim */ > - struct dmz_reclaim *reclaim; > - > - /* For chunk work */ > - struct radix_tree_root chunk_rxtree; > - struct workqueue_struct *chunk_wq; > - struct mutex chunk_lock; > - > - /* For cloned BIOs to zones */ > - struct bio_set bio_set; > - > - /* For flush */ > - spinlock_t flush_lock; > - struct bio_list flush_list; > - struct delayed_work flush_work; > - struct workqueue_struct *flush_wq; > -}; I am not sure I understand why this needs to be moved from here into dm-zoned.h... > - > -/* > * Flush intervals (seconds). > */ > #define DMZ_FLUSH_PERIOD (10 * HZ) > @@ -679,7 +647,7 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) > /* > * Get zoned device information. > */ > -static int dmz_get_zoned_device(struct dm_target *ti, char *path) > +static int dmz_get_device(struct dm_target *ti, char *path, bool zoned) I do not think you need the zoned argument here. You can easily detect this using bdev_is_zoned() once you get the bdev. > { > struct dmz_target *dmz = ti->private; > struct request_queue *q; > @@ -688,11 +656,22 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path) > int ret; > > /* Get the target device */ > - ret = dm_get_device(ti, path, dm_table_get_mode(ti->table), &dmz->ddev); > - if (ret) { > - ti->error = "Get target device failed"; > - dmz->ddev = NULL; > - return ret; > + if (zoned) { > + ret = dm_get_device(ti, path, dm_table_get_mode(ti->table), > + &dmz->ddev); > + if (ret) { > + ti->error = "Get target device failed"; > + dmz->ddev = NULL; > + return ret; > + } > + } else { > + ret = dm_get_device(ti, path, dm_table_get_mode(ti->table), > + &dmz->regu_dm_dev); > + if (ret) { > + ti->error = "Get target device failed"; > + dmz->regu_dm_dev = NULL; > + return ret; > + } If you use a local variable ddev, you do not need to duplicate this hunk. All you need is: if (zoned) dmz->zddev = ddev; else dmz->cddev = ddev; > } > > dev = kzalloc(sizeof(struct dmz_dev), GFP_KERNEL); > @@ -701,39 +680,61 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path) > goto err; > } > > - dev->bdev = dmz->ddev->bdev; > - (void)bdevname(dev->bdev, dev->name); > - > - if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE) { > - ti->error = "Not a zoned block device"; > - ret = -EINVAL; > - goto err; > + if (zoned) { > + dev->bdev = dmz->ddev->bdev; > + if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE) { > + ti->error = "Not a zoned block device"; > + ret = -EINVAL; > + goto err; > + } > } > + else > + dev->bdev = dmz->regu_dm_dev->bdev; > + > + (void)bdevname(dev->bdev, dev->name); > + dev->target = dmz; > > q = bdev_get_queue(dev->bdev); > dev->capacity = i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; > aligned_capacity = dev->capacity & > ~((sector_t)blk_queue_zone_sectors(q) - 1); > - if (ti->begin || > - ((ti->len != dev->capacity) && (ti->len != aligned_capacity))) { > - ti->error = "Partial mapping not supported"; > - ret = -EINVAL; > - goto err; > - } > > - dev->zone_nr_sectors = blk_queue_zone_sectors(q); > - dev->zone_nr_sectors_shift = ilog2(dev->zone_nr_sectors); > + if (zoned) { > + if (ti->begin || ((ti->len != dev->capacity) && > + (ti->len != aligned_capacity))) { > + ti->error = "Partial mapping not supported"; > + ret = -EINVAL; > + goto err; > + } > + dev->zone_nr_sectors = blk_queue_zone_sectors(q); > + dev->zone_nr_sectors_shift = ilog2(dev->zone_nr_sectors); > + > + dev->zone_nr_blocks = dmz_sect2blk(dev->zone_nr_sectors); > + dev->zone_nr_blocks_shift = ilog2(dev->zone_nr_blocks); > > - dev->zone_nr_blocks = dmz_sect2blk(dev->zone_nr_sectors); > - dev->zone_nr_blocks_shift = ilog2(dev->zone_nr_blocks); > + dev->nr_zones = blkdev_nr_zones(dev->bdev->bd_disk); > > - dev->nr_zones = blkdev_nr_zones(dev->bdev->bd_disk); > + dmz->zoned_dev = dev; > + } else { > + /* Emulate regular device zone info by using the same zone size.*/ > + dev->zone_nr_sectors = dmz->zoned_dev->zone_nr_sectors; > + dev->zone_nr_sectors_shift = ilog2(dev->zone_nr_sectors); > > - dmz->zoned_dev = dev; > + dev->zone_nr_blocks = dmz_sect2blk(dev->zone_nr_sectors); > + dev->zone_nr_blocks_shift = ilog2(dev->zone_nr_blocks); > + > + dev->nr_zones = (get_capacity(dev->bdev->bd_disk) >> > + ilog2(dev->zone_nr_sectors)); > + > + dmz->regu_dmz_dev = dev; > + } > > return 0; > err: > - dm_put_device(ti, dmz->ddev); > + if (zoned) > + dm_put_device(ti, dmz->ddev); > + else > + dm_put_device(ti, dmz->regu_dm_dev); A local ddev variable will avoid the need for the if/else here. > kfree(dev); > > return ret; > @@ -746,6 +747,12 @@ static void dmz_put_zoned_device(struct dm_target *ti) > { > struct dmz_target *dmz = ti->private; > > + if (dmz->regu_dm_dev) > + dm_put_device(ti, dmz->regu_dm_dev); > + if (dmz->regu_dmz_dev) { > + kfree(dmz->regu_dmz_dev); > + dmz->regu_dmz_dev = NULL; > + } > dm_put_device(ti, dmz->ddev); > kfree(dmz->zoned_dev); > dmz->zoned_dev = NULL; > @@ -761,7 +768,7 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv) > int ret; > > /* Check arguments */ > - if (argc != 1) { > + if ((argc != 1) && (argc != 2)) { > ti->error = "Invalid argument count"; > return -EINVAL; > } > @@ -775,12 +782,25 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv) > ti->private = dmz; > > /* Get the target zoned block device */ > - ret = dmz_get_zoned_device(ti, argv[0]); > + ret = dmz_get_device(ti, argv[0], 1); > if (ret) { > dmz->ddev = NULL; > goto err; > } > > + snprintf(dmz->name, BDEVNAME_SIZE, "%s", dmz->zoned_dev->name); > + dmz->nr_zones = dmz->zoned_dev->nr_zones; > + if (argc == 2) { > + ret = dmz_get_device(ti, argv[1], 0); > + if (ret) { > + dmz->regu_dm_dev = NULL; > + goto err; > + } > + snprintf(dmz->name, BDEVNAME_SIZE * 2, "%s:%s", > + dmz->zoned_dev->name, dmz->regu_dmz_dev->name); > + dmz->nr_zones += dmz->regu_dmz_dev->nr_zones; > + } > + > /* Initialize metadata */ > dev = dmz->zoned_dev; > ret = dmz_ctr_metadata(dev, &dmz->metadata); > @@ -962,6 +982,7 @@ static int dmz_iterate_devices(struct dm_target *ti, > struct dmz_dev *dev = dmz->zoned_dev; > sector_t capacity = dev->capacity & ~(dev->zone_nr_sectors - 1); > > + /* Todo: fn(dmz->regu_dm_dev) */ > return fn(ti, dmz->ddev, 0, capacity, data); > } > > diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h > index 5b5e493..a3535bc 100644 > --- a/drivers/md/dm-zoned.h > +++ b/drivers/md/dm-zoned.h > @@ -46,9 +46,51 @@ > #define dmz_bio_blocks(bio) dmz_sect2blk(bio_sectors(bio)) > > /* > + * Target descriptor. > + */ > +struct dmz_target { > + struct dm_dev *ddev; > + /* > + * Regular device for store metdata and buffer write, use zoned device > + * by default if no regular device was set. > + */ > + struct dm_dev *regu_dm_dev; rddev is shorter... > + struct dmz_dev *regu_dmz_dev; And rdev here ? Or "cdev" with the c standing for "cache" and "conventional (=not zoned)" at the same time. > + /* Total nr_zones. */ > + unsigned int nr_zones; > + char name[BDEVNAME_SIZE * 2]; I would define 2 fields rather than doubling the nbame length. The field names can follow the same pattern and zdev/cdev, you add zname and cname. Anyway, this string is already in dmz_dev, so why add it ? > + > + unsigned long flags; Flags are currently for target and backedn device. This needs to sorted out because there will be a need for per backend device (e.g. dying flag etc) flags, so this needs to be split, one flag field for each cache and zoned dev. > + > + /* Zoned block device information */ > + struct dmz_dev *zoned_dev; Similarly to regu_dmz_dev, it would be better to pair this one with struct dm_dev *ddev above and rename ddev field to zddev. And to simplify everything, you could move ddev to struct dmz_dev and add a flags field there. Then all you need in struct dmz_target is: struct dm_dev *cdev; struct dm_dev *zdev; > + > + /* For metadata handling */ > + struct dmz_metadata *metadata; > + > + /* For reclaim */ > + struct dmz_reclaim *reclaim; > + > + /* For chunk work */ > + struct radix_tree_root chunk_rxtree; > + struct workqueue_struct *chunk_wq; > + struct mutex chunk_lock; > + > + /* For cloned BIOs to zones */ > + struct bio_set bio_set; > + > + /* For flush */ > + spinlock_t flush_lock; > + struct bio_list flush_list; > + struct delayed_work flush_work; > + struct workqueue_struct *flush_wq; > +}; > + > +/* > * Zoned block device information. > */ > struct dmz_dev { > + struct dmz_target *target; > struct block_device *bdev; > > char name[BDEVNAME_SIZE]; > @@ -147,16 +189,16 @@ enum { > * Message functions. > */ > #define dmz_dev_info(dev, format, args...) \ > - DMINFO("(%s): " format, (dev)->name, ## args) > + DMINFO("(%s): " format, (dev)->target->name, ## args) > > #define dmz_dev_err(dev, format, args...) \ > - DMERR("(%s): " format, (dev)->name, ## args) > + DMERR("(%s): " format, (dev)->target->name, ## args) > > #define dmz_dev_warn(dev, format, args...) \ > - DMWARN("(%s): " format, (dev)->name, ## args) > + DMWARN("(%s): " format, (dev)->target->name, ## args) > > #define dmz_dev_debug(dev, format, args...) \ > - DMDEBUG("(%s): " format, (dev)->name, ## args) > + DMDEBUG("(%s): " format, (dev)->target->name, ## args) > > struct dmz_metadata; > struct dmz_reclaim; > -- Damien Le Moal Western Digital Research