Re: [PATCH] block: make maximum zone append size configurable

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2020/10/06 23:15, Johannes Thumshirn wrote:
> Martin rightfully noted that for normal filesystem IO we have soft limits
> in place, to prevent them from getting too big and not lead to
> unpredictable latencies. For zone append we only have the hardware limit
> in place.
> 
> Add a soft limit to the maximal zone append size which is gated by the
> hardware's capabilities, so the user can control the IO size of zone
> append commands.
> 
> Reported-by: Martin K. Petersen <martin.petersen@xxxxxxxxxx>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@xxxxxxx>
> ---
>  block/blk-settings.c           | 10 ++++++----
>  block/blk-sysfs.c              | 33 ++++++++++++++++++++++++++++++++-
>  drivers/block/null_blk_zoned.c |  2 +-
>  drivers/nvme/host/core.c       |  2 +-
>  drivers/scsi/sd_zbc.c          |  2 +-
>  include/linux/blkdev.h         |  3 ++-
>  6 files changed, 43 insertions(+), 9 deletions(-)
> 
> diff --git a/block/blk-settings.c b/block/blk-settings.c
> index 4f6eb4bb1723..e4ff7546dd82 100644
> --- a/block/blk-settings.c
> +++ b/block/blk-settings.c
> @@ -222,11 +222,11 @@ void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
>  EXPORT_SYMBOL(blk_queue_max_write_zeroes_sectors);
>  
>  /**
> - * blk_queue_max_zone_append_sectors - set max sectors for a single zone append
> + * blk_queue_max_hw_zone_append_sectors - set max sectors for a single zone append
>   * @q:  the request queue for the device
>   * @max_zone_append_sectors: maximum number of sectors to write per command
>   **/
> -void blk_queue_max_zone_append_sectors(struct request_queue *q,
> +void blk_queue_max_hw_zone_append_sectors(struct request_queue *q,
>  		unsigned int max_zone_append_sectors)
>  {
>  	unsigned int max_sectors;
> @@ -244,9 +244,11 @@ void blk_queue_max_zone_append_sectors(struct request_queue *q,
>  	 */
>  	WARN_ON(!max_sectors);
>  
> -	q->limits.max_zone_append_sectors = max_sectors;
> +	q->limits.max_hw_zone_append_sectors = max_sectors;
> +	q->limits.max_zone_append_sectors = min_not_zero(max_sectors,
> +							 q->limits.max_zone_append_sectors);
>  }
> -EXPORT_SYMBOL_GPL(blk_queue_max_zone_append_sectors);
> +EXPORT_SYMBOL_GPL(blk_queue_max_hw_zone_append_sectors);
>  
>  /**
>   * blk_queue_max_segments - set max hw segments for a request for this queue
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index 76b54c7750b0..087b7e66e638 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -226,6 +226,35 @@ static ssize_t queue_zone_append_max_show(struct request_queue *q, char *page)
>  	return sprintf(page, "%llu\n", max_sectors << SECTOR_SHIFT);
>  }
>  
> +static ssize_t
> +queue_zone_append_max_store(struct request_queue *q, const char *page,
> +				    size_t count)
> +{
> +	unsigned long max_hw_sectors = q->limits.max_hw_zone_append_sectors;
> +	unsigned long max_sectors;
> +	ssize_t ret;
> +
> +	ret = queue_var_store(&max_sectors, page, count);
> +	if (ret < 0)
> +		return ret;
> +
> +	max_sectors >>= SECTOR_SHIFT;
> +	max_sectors = min_not_zero(max_sectors, max_hw_sectors);
> +
> +	spin_lock_irq(&q->queue_lock);
> +	q->limits.max_zone_append_sectors = max_sectors;
> +	spin_unlock_irq(&q->queue_lock);
> +
> +	return ret;
> +}

Hmmm. That is one more tunable knob, and one that the user/sysadmin may not
consider without knowing that the FS is actually using zone append. E.g. btrfs
does, f2fs does not. I was thinking of something simpler:

* Keep the soft limit zone_append_max_bytes/max_zone_append_sectors as RO
* Change its value when the generic soft limit max_sectors is changed.

Something like this:

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 7dda709f3ccb..78817d7acb66 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -246,6 +246,11 @@ queue_max_sectors_store(struct request_queue *q, const char
*page, size_t count)
        spin_lock_irq(&q->queue_lock);
        q->limits.max_sectors = max_sectors_kb << 1;
        q->backing_dev_info->io_pages = max_sectors_kb >> (PAGE_SHIFT - 10);
+
+       q->limits.max_zone_append_sectors =
+               min(q->limits.max_sectors,
+                   q->limits.max_hw_zone_append_sectors);
+
        spin_unlock_irq(&q->queue_lock);

        return ret;

The reasoning is that zone appends are a variation of write commands, and since
max_sectors will gate the size of all read and write commands, it should also
gate the size zone append writes. And that avoids adding yet another tuning knob
for users to get confused about.

Martin,

Thoughts ?



> +
> +static ssize_t queue_zone_append_max_hw_show(struct request_queue *q, char *page)
> +{
> +	unsigned long long max_sectors = q->limits.max_hw_zone_append_sectors;
> +
> +	return sprintf(page, "%llu\n", max_sectors << SECTOR_SHIFT);
> +}
> +
>  static ssize_t
>  queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
>  {
> @@ -584,7 +613,8 @@ QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data");
>  
>  QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes");
>  QUEUE_RO_ENTRY(queue_write_zeroes_max, "write_zeroes_max_bytes");
> -QUEUE_RO_ENTRY(queue_zone_append_max, "zone_append_max_bytes");
> +QUEUE_RW_ENTRY(queue_zone_append_max, "zone_append_max_bytes");
> +QUEUE_RO_ENTRY(queue_zone_append_max_hw, "zone_append_max_hw_bytes");
>  
>  QUEUE_RO_ENTRY(queue_zoned, "zoned");
>  QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
> @@ -639,6 +669,7 @@ static struct attribute *queue_attrs[] = {
>  	&queue_write_same_max_entry.attr,
>  	&queue_write_zeroes_max_entry.attr,
>  	&queue_zone_append_max_entry.attr,
> +	&queue_zone_append_max_hw_entry.attr,
>  	&queue_nonrot_entry.attr,
>  	&queue_zoned_entry.attr,
>  	&queue_nr_zones_entry.attr,
> diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
> index 3d25c9ad2383..b1b08f2a09bd 100644
> --- a/drivers/block/null_blk_zoned.c
> +++ b/drivers/block/null_blk_zoned.c
> @@ -98,7 +98,7 @@ int null_register_zoned_dev(struct nullb *nullb)
>  		q->nr_zones = blkdev_nr_zones(nullb->disk);
>  	}
>  
> -	blk_queue_max_zone_append_sectors(q, dev->zone_size_sects);
> +	blk_queue_max_hw_zone_append_sectors(q, dev->zone_size_sects);
>  
>  	return 0;
>  }
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index c190c56bf702..b2da0ab9d68a 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2217,7 +2217,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
>  
>  		ret = blk_revalidate_disk_zones(disk, NULL);
>  		if (!ret)
> -			blk_queue_max_zone_append_sectors(disk->queue,
> +			blk_queue_max_hw_zone_append_sectors(disk->queue,
>  							  ctrl->max_zone_append);
>  	}
>  #endif
> diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
> index 0e94ff056bff..9412445d4efb 100644
> --- a/drivers/scsi/sd_zbc.c
> +++ b/drivers/scsi/sd_zbc.c
> @@ -705,7 +705,7 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
>  			   q->limits.max_segments << (PAGE_SHIFT - 9));
>  	max_append = min_t(u32, max_append, queue_max_hw_sectors(q));
>  
> -	blk_queue_max_zone_append_sectors(q, max_append);
> +	blk_queue_max_hw_zone_append_sectors(q, max_append);
>  
>  	sd_zbc_print_zones(sdkp);
>  
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index cf80e61b4c5e..e53ea384c15d 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -336,6 +336,7 @@ struct queue_limits {
>  	unsigned int		max_hw_discard_sectors;
>  	unsigned int		max_write_same_sectors;
>  	unsigned int		max_write_zeroes_sectors;
> +	unsigned int		max_hw_zone_append_sectors;
>  	unsigned int		max_zone_append_sectors;
>  	unsigned int		discard_granularity;
>  	unsigned int		discard_alignment;
> @@ -1141,7 +1142,7 @@ extern void blk_queue_max_write_same_sectors(struct request_queue *q,
>  extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
>  		unsigned int max_write_same_sectors);
>  extern void blk_queue_logical_block_size(struct request_queue *, unsigned int);
> -extern void blk_queue_max_zone_append_sectors(struct request_queue *q,
> +extern void blk_queue_max_hw_zone_append_sectors(struct request_queue *q,
>  		unsigned int max_zone_append_sectors);
>  extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
>  extern void blk_queue_alignment_offset(struct request_queue *q,
> 


-- 
Damien Le Moal
Western Digital Research




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux