On 2021/05/19 18:36, Christoph Hellwig wrote: > The logic looks fine to me, but this makes blkdev_zone_mgmt extremely > convoluted. What do you think of the version below that splits out > two self contained helpers instead? Yep, that works for me. Will use this in v2. > > --- > From 3ff31f2502cf032ac1331122c9f1a018b769b577 Mon Sep 17 00:00:00 2001 > From: Damien Le Moal <damien.lemoal@xxxxxxx> > Date: Wed, 19 May 2021 11:55:19 +0900 > Subject: block: improve handling of all zones reset operation > > SCSI, ZNS and null_blk zoned devices support resetting all zones using > a single command (REQ_OP_ZONE_RESET_ALL), as indicated using the device > request queue flag QUEUE_FLAG_ZONE_RESETALL. This flag is not set for > device mapper targets creating zoned devices. In this case, a user > request for resetting all zones of a device is processed in > blkdev_zone_mgmt() by issuing a REQ_OP_ZONE_RESET operation for each > zone of the device. This leads to different behaviors of the > BLKRESETZONE ioctl() depending on the target device support for the > reset all operation. E.g. > > blkzone reset /dev/sdX > > will reset all zones of a SCSI device using a single command that will > ignore conventional, read-only or offline zones. > > But a dm-linear device including conventional, read-only or offline > zones cannot be reset in the same manner as some of the single zone > reset operations issued by blkdev_zone_mgmt() will fail. E.g.: > > blkzone reset /dev/dm-Y > blkzone: /dev/dm-0: BLKRESETZONE ioctl failed: Remote I/O error > > To simplify applications and tools development, unify the behavior of > an all-zone reset operation by modifying blkdev_zone_mgmt() to not > issue a zone reset operation for conventional, read-only and offline > zones, thus mimicking what an actual reset-all device command does on a > device supporting REQ_OP_ZONE_RESET_ALL. The zones needing a reset are > identified using a bitmap that is initialized using a zone report. > Since empty zones do not need a reset, also ignore these zones. > > Signed-off-by: Damien Le Moal <damien.lemoal@xxxxxxx> > [hch: split into multiple functions] > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > --- > block/blk-zoned.c | 115 +++++++++++++++++++++++++++++++++++----------- > 1 file changed, 88 insertions(+), 27 deletions(-) > > diff --git a/block/blk-zoned.c b/block/blk-zoned.c > index 250cb76ee615..48e8376c1db8 100644 > --- a/block/blk-zoned.c > +++ b/block/blk-zoned.c > @@ -161,18 +161,85 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector, > } > EXPORT_SYMBOL_GPL(blkdev_report_zones); > > -static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev, > - sector_t sector, > - sector_t nr_sectors) > +static inline unsigned long *blk_alloc_zone_bitmap(int node, > + unsigned int nr_zones) > { > - if (!blk_queue_zone_resetall(bdev_get_queue(bdev))) > - return false; > + return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long), > + GFP_NOIO, node); > +} > > +static int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx, > + void *data) > +{ > /* > - * REQ_OP_ZONE_RESET_ALL can be executed only if the number of sectors > - * of the applicable zone range is the entire disk. > + * For an all-zones reset, ignore conventional, empty, read-only > + * and offline zones. > */ > - return !sector && nr_sectors == get_capacity(bdev->bd_disk); > + switch (zone->cond) { > + case BLK_ZONE_COND_NOT_WP: > + case BLK_ZONE_COND_EMPTY: > + case BLK_ZONE_COND_READONLY: > + case BLK_ZONE_COND_OFFLINE: > + return 0; > + default: > + set_bit(idx, (unsigned long *)data); > + return 0; > + } > +} > + > +static int blkdev_zone_reset_all_emulated(struct block_device *bdev, > + gfp_t gfp_mask) > +{ > + struct request_queue *q = bdev_get_queue(bdev); > + sector_t capacity = get_capacity(bdev->bd_disk); > + sector_t zone_sectors = blk_queue_zone_sectors(q); > + unsigned long *need_reset; > + sector_t sector; > + struct bio *bio = NULL; > + int ret; > + > + need_reset = blk_alloc_zone_bitmap(q->node, q->nr_zones); > + if (!need_reset) > + return -ENOMEM; > + ret = bdev->bd_disk->fops->report_zones(bdev->bd_disk, 0, > + q->nr_zones, blk_zone_need_reset_cb, > + need_reset); > + if (ret < 0) > + goto out_free_need_reset; > + > + ret = 0; > + while (sector < capacity) { > + if (!test_bit(blk_queue_zone_no(q, sector), need_reset)) { > + sector += zone_sectors; > + continue; > + } > + bio = blk_next_bio(bio, 0, gfp_mask); > + bio_set_dev(bio, bdev); > + bio->bi_opf = REQ_OP_ZONE_RESET | REQ_SYNC; > + bio->bi_iter.bi_sector = sector; > + sector += zone_sectors; > + > + /* This may take a while, so be nice to others */ > + cond_resched(); > + } > + > + if (bio) { > + ret = submit_bio_wait(bio); > + bio_put(bio); > + } > +out_free_need_reset: > + kfree(need_reset); > + return ret; > +} > + > +static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask) > +{ > + struct bio bio; > + > + bio_init(&bio, NULL, 0); > + bio_set_dev(&bio, bdev); > + bio.bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC; > + return submit_bio_wait(&bio); > } > > /** > @@ -200,7 +267,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, > sector_t capacity = get_capacity(bdev->bd_disk); > sector_t end_sector = sector + nr_sectors; > struct bio *bio = NULL; > - int ret; > + int ret = 0; > > if (!blk_queue_is_zoned(q)) > return -EOPNOTSUPP; > @@ -222,20 +289,21 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, > if ((nr_sectors & (zone_sectors - 1)) && end_sector != capacity) > return -EINVAL; > > + /* > + * In the case of a zone reset operation over all zones, > + * REQ_OP_ZONE_RESET_ALL can be used with devices supporting this > + * command. For other devices, we emulate this command behavior by > + * identifying the zones needing a reset. > + */ > + if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) { > + if (!blk_queue_zone_resetall(q)) > + return blkdev_zone_reset_all_emulated(bdev, gfp_mask); > + return blkdev_zone_reset_all(bdev, gfp_mask); > + } > + > while (sector < end_sector) { > bio = blk_next_bio(bio, 0, gfp_mask); > bio_set_dev(bio, bdev); > - > - /* > - * Special case for the zone reset operation that reset all > - * zones, this is useful for applications like mkfs. > - */ > - if (op == REQ_OP_ZONE_RESET && > - blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) { > - bio->bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC; > - break; > - } > - > bio->bi_opf = op | REQ_SYNC; > bio->bi_iter.bi_sector = sector; > sector += zone_sectors; > @@ -396,13 +464,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, > return ret; > } > > -static inline unsigned long *blk_alloc_zone_bitmap(int node, > - unsigned int nr_zones) > -{ > - return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long), > - GFP_NOIO, node); > -} > - > void blk_queue_free_zone_bitmaps(struct request_queue *q) > { > kfree(q->conv_zones_bitmap); > -- Damien Le Moal Western Digital Research