Re: [PATCH 01/11] block: improve handling of all zones reset operation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2021/05/19 18:36, Christoph Hellwig wrote:
> The logic looks fine to me, but this makes blkdev_zone_mgmt extremely
> convoluted.  What do you think of the version below that splits out
> two self contained helpers instead?

Yep, that works for me. Will use this in v2.

> 
> ---
> From 3ff31f2502cf032ac1331122c9f1a018b769b577 Mon Sep 17 00:00:00 2001
> From: Damien Le Moal <damien.lemoal@xxxxxxx>
> Date: Wed, 19 May 2021 11:55:19 +0900
> Subject: block: improve handling of all zones reset operation
> 
> SCSI, ZNS and null_blk zoned devices support resetting all zones using
> a single command (REQ_OP_ZONE_RESET_ALL), as indicated using the device
> request queue flag QUEUE_FLAG_ZONE_RESETALL. This flag is not set for
> device mapper targets creating zoned devices. In this case, a user
> request for resetting all zones of a device is processed in
> blkdev_zone_mgmt() by issuing a REQ_OP_ZONE_RESET operation for each
> zone of the device. This leads to different behaviors of the
> BLKRESETZONE ioctl() depending on the target device support for the
> reset all operation. E.g.
> 
> blkzone reset /dev/sdX
> 
> will reset all zones of a SCSI device using a single command that will
> ignore conventional, read-only or offline zones.
> 
> But a dm-linear device including conventional, read-only or offline
> zones cannot be reset in the same manner as some of the single zone
> reset operations issued by blkdev_zone_mgmt() will fail. E.g.:
> 
> blkzone reset /dev/dm-Y
> blkzone: /dev/dm-0: BLKRESETZONE ioctl failed: Remote I/O error
> 
> To simplify applications and tools development, unify the behavior of
> an all-zone reset operation by modifying blkdev_zone_mgmt() to not
> issue a zone reset operation for conventional, read-only and offline
> zones, thus mimicking what an actual reset-all device command does on a
> device supporting REQ_OP_ZONE_RESET_ALL. The zones needing a reset are
> identified using a bitmap that is initialized using a zone report.
> Since empty zones do not need a reset, also ignore these zones.
> 
> Signed-off-by: Damien Le Moal <damien.lemoal@xxxxxxx>
> [hch: split into multiple functions]
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> ---
>  block/blk-zoned.c | 115 +++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 88 insertions(+), 27 deletions(-)
> 
> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
> index 250cb76ee615..48e8376c1db8 100644
> --- a/block/blk-zoned.c
> +++ b/block/blk-zoned.c
> @@ -161,18 +161,85 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
>  }
>  EXPORT_SYMBOL_GPL(blkdev_report_zones);
>  
> -static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
> -						sector_t sector,
> -						sector_t nr_sectors)
> +static inline unsigned long *blk_alloc_zone_bitmap(int node,
> +						   unsigned int nr_zones)
>  {
> -	if (!blk_queue_zone_resetall(bdev_get_queue(bdev)))
> -		return false;
> +	return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
> +			    GFP_NOIO, node);
> +}
>  
> +static int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
> +				  void *data)
> +{
>  	/*
> -	 * REQ_OP_ZONE_RESET_ALL can be executed only if the number of sectors
> -	 * of the applicable zone range is the entire disk.
> +	 * For an all-zones reset, ignore conventional, empty, read-only
> +	 * and offline zones.
>  	 */
> -	return !sector && nr_sectors == get_capacity(bdev->bd_disk);
> +	switch (zone->cond) {
> +	case BLK_ZONE_COND_NOT_WP:
> +	case BLK_ZONE_COND_EMPTY:
> +	case BLK_ZONE_COND_READONLY:
> +	case BLK_ZONE_COND_OFFLINE:
> +		return 0;
> +	default:
> +		set_bit(idx, (unsigned long *)data);
> +		return 0;
> +	}
> +}
> +
> +static int blkdev_zone_reset_all_emulated(struct block_device *bdev,
> +		     gfp_t gfp_mask)
> +{
> +	struct request_queue *q = bdev_get_queue(bdev);
> +	sector_t capacity = get_capacity(bdev->bd_disk);
> +	sector_t zone_sectors = blk_queue_zone_sectors(q);
> +	unsigned long *need_reset;
> +	sector_t sector;
> +	struct bio *bio = NULL;
> +	int ret;
> +
> +	need_reset = blk_alloc_zone_bitmap(q->node, q->nr_zones);
> +	if (!need_reset)
> +		return -ENOMEM;
> +	ret = bdev->bd_disk->fops->report_zones(bdev->bd_disk, 0,
> +				q->nr_zones, blk_zone_need_reset_cb,
> +				need_reset);
> +	if (ret < 0)
> +		goto out_free_need_reset;
> +
> +	ret = 0;
> +	while (sector < capacity) {
> +		if (!test_bit(blk_queue_zone_no(q, sector), need_reset)) {
> +			sector += zone_sectors;
> +			continue;
> +		}
> +		bio = blk_next_bio(bio, 0, gfp_mask);
> +		bio_set_dev(bio, bdev);
> +		bio->bi_opf = REQ_OP_ZONE_RESET | REQ_SYNC;
> +		bio->bi_iter.bi_sector = sector;
> +		sector += zone_sectors;
> +
> +		/* This may take a while, so be nice to others */
> +		cond_resched();
> +	}
> +
> +	if (bio) {
> +		ret = submit_bio_wait(bio);
> +		bio_put(bio);
> +	}
> +out_free_need_reset:
> +	kfree(need_reset);
> +	return ret;
> +}
> +
> +static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask)
> +{
> +	struct bio bio;
> +
> +	bio_init(&bio, NULL, 0);
> +	bio_set_dev(&bio, bdev);
> +	bio.bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC;
> +	return submit_bio_wait(&bio);
>  }
>  
>  /**
> @@ -200,7 +267,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
>  	sector_t capacity = get_capacity(bdev->bd_disk);
>  	sector_t end_sector = sector + nr_sectors;
>  	struct bio *bio = NULL;
> -	int ret;
> +	int ret = 0;
>  
>  	if (!blk_queue_is_zoned(q))
>  		return -EOPNOTSUPP;
> @@ -222,20 +289,21 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
>  	if ((nr_sectors & (zone_sectors - 1)) && end_sector != capacity)
>  		return -EINVAL;
>  
> +	/*
> +	 * In the case of a zone reset operation over all zones,
> +	 * REQ_OP_ZONE_RESET_ALL can be used with devices supporting this
> +	 * command. For other devices, we emulate this command behavior by
> +	 * identifying the zones needing a reset.
> +	 */
> +	if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) {
> +		if (!blk_queue_zone_resetall(q))
> +			return blkdev_zone_reset_all_emulated(bdev, gfp_mask);
> +		return blkdev_zone_reset_all(bdev, gfp_mask);
> +	}
> +
>  	while (sector < end_sector) {
>  		bio = blk_next_bio(bio, 0, gfp_mask);
>  		bio_set_dev(bio, bdev);
> -
> -		/*
> -		 * Special case for the zone reset operation that reset all
> -		 * zones, this is useful for applications like mkfs.
> -		 */
> -		if (op == REQ_OP_ZONE_RESET &&
> -		    blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) {
> -			bio->bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC;
> -			break;
> -		}
> -
>  		bio->bi_opf = op | REQ_SYNC;
>  		bio->bi_iter.bi_sector = sector;
>  		sector += zone_sectors;
> @@ -396,13 +464,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>  	return ret;
>  }
>  
> -static inline unsigned long *blk_alloc_zone_bitmap(int node,
> -						   unsigned int nr_zones)
> -{
> -	return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
> -			    GFP_NOIO, node);
> -}
> -
>  void blk_queue_free_zone_bitmaps(struct request_queue *q)
>  {
>  	kfree(q->conv_zones_bitmap);
> 


-- 
Damien Le Moal
Western Digital Research



--
dm-devel mailing list
dm-devel@xxxxxxxxxx
https://listman.redhat.com/mailman/listinfo/dm-devel




[Index of Archives]     [DM Crypt]     [Fedora Desktop]     [ATA RAID]     [Fedora Marketing]     [Fedora Packaging]     [Fedora SELinux]     [Yosemite Discussion]     [KDE Users]     [Fedora Docs]

  Powered by Linux