Re: [PATCH 1/2] block: add max_open_zones to blk-sysfs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2020/06/30 10:49, Damien Le Moal wrote:
> On 2020/06/16 19:28, Niklas Cassel wrote:
>> Add a new max_open_zones definition in the sysfs documentation.
>> This definition will be common for all devices utilizing the zoned block
>> device support in the kernel.
>>
>> Export max open zones according to this new definition for NVMe Zoned
>> Namespace devices, ZAC ATA devices (which are treated as SCSI devices by
>> the kernel), and ZBC SCSI devices.
>>
>> Add the new max_open_zones struct member to the request_queue, rather
> 
> Add the new max_open_zones member to struct request_queue...
> 
>> than as a queue limit, since this property cannot be split across stacking
>> drivers.
> 
> But device-mapper target device have a request queue too and it looks like your
> patch is not setting any value, using the default 0 for dm-linear and dm-flakey.
> Attaching the new attribute directly to the request queue rather than adding it
> as part of the queue limits seems odd. Even if DM case is left unsupported
> (using the default 0 = no limit), it may be cleaner to add the field as part of
> the limit struct.
> 
> Adding the field as a device attribute rather than a queue limit, similarly to
> the device maximum queue depth would be another option. In such case, including
> the field directly as part of the request queue makes more sense.

Thinking more about this one, struct request_queue has nr_zones field, which is
not a queue limit but still exported as a queue attribute. Device mapper
exposing a zoned drive target do set this field manually. So I guess the same
approach is valid for max_open_zones (and max_active_zones). So OK, disregard
this comment.

The other comments I sent below remain though.

> 
>>
>> Signed-off-by: Niklas Cassel <niklas.cassel@xxxxxxx>
>> ---
>>  Documentation/block/queue-sysfs.rst |  7 +++++++
>>  block/blk-sysfs.c                   | 15 +++++++++++++++
>>  drivers/nvme/host/zns.c             |  1 +
>>  drivers/scsi/sd_zbc.c               |  4 ++++
>>  include/linux/blkdev.h              | 20 ++++++++++++++++++++
>>  5 files changed, 47 insertions(+)
>>
>> diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst
>> index 6a8513af9201..f01cf8530ae4 100644
>> --- a/Documentation/block/queue-sysfs.rst
>> +++ b/Documentation/block/queue-sysfs.rst
>> @@ -117,6 +117,13 @@ Maximum number of elements in a DMA scatter/gather list with integrity
>>  data that will be submitted by the block layer core to the associated
>>  block driver.
>>  
>> +max_open_zones (RO)
>> +-------------------
>> +For zoned block devices (zoned attribute indicating "host-managed" or
>> +"host-aware"), the sum of zones belonging to any of the zone states:
>> +EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value.
>> +If this value is 0, there is no limit.
>> +
>>  max_sectors_kb (RW)
>>  -------------------
>>  This is the maximum number of kilobytes that the block layer will allow
>> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
>> index 02643e149d5e..fa42961e9678 100644
>> --- a/block/blk-sysfs.c
>> +++ b/block/blk-sysfs.c
>> @@ -305,6 +305,11 @@ static ssize_t queue_nr_zones_show(struct request_queue *q, char *page)
>>  	return queue_var_show(blk_queue_nr_zones(q), page);
>>  }
>>  
>> +static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page)
>> +{
>> +	return queue_var_show(queue_max_open_zones(q), page);
>> +}
>> +
>>  static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
>>  {
>>  	return queue_var_show((blk_queue_nomerges(q) << 1) |
>> @@ -667,6 +672,11 @@ static struct queue_sysfs_entry queue_nr_zones_entry = {
>>  	.show = queue_nr_zones_show,
>>  };
>>  
>> +static struct queue_sysfs_entry queue_max_open_zones_entry = {
>> +	.attr = {.name = "max_open_zones", .mode = 0444 },
>> +	.show = queue_max_open_zones_show,
>> +};
>> +
>>  static struct queue_sysfs_entry queue_nomerges_entry = {
>>  	.attr = {.name = "nomerges", .mode = 0644 },
>>  	.show = queue_nomerges_show,
>> @@ -765,6 +775,7 @@ static struct attribute *queue_attrs[] = {
>>  	&queue_nonrot_entry.attr,
>>  	&queue_zoned_entry.attr,
>>  	&queue_nr_zones_entry.attr,
>> +	&queue_max_open_zones_entry.attr,
>>  	&queue_nomerges_entry.attr,
>>  	&queue_rq_affinity_entry.attr,
>>  	&queue_iostats_entry.attr,
>> @@ -792,6 +803,10 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
>>  		(!q->mq_ops || !q->mq_ops->timeout))
>>  			return 0;
>>  
>> +	if (attr == &queue_max_open_zones_entry.attr &&
>> +	    !blk_queue_is_zoned(q))
>> +		return 0;
>> +
>>  	return attr->mode;
>>  }
>>  
>> diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
>> index c08f6281b614..af156529f3b6 100644
>> --- a/drivers/nvme/host/zns.c
>> +++ b/drivers/nvme/host/zns.c
>> @@ -82,6 +82,7 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>  
>>  	q->limits.zoned = BLK_ZONED_HM;
>>  	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>> +	blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1);
>>  free_data:
>>  	kfree(id);
>>  	return status;
>> diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
>> index 183a20720da9..aa3564139b40 100644
>> --- a/drivers/scsi/sd_zbc.c
>> +++ b/drivers/scsi/sd_zbc.c
>> @@ -717,6 +717,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
>>  	/* The drive satisfies the kernel restrictions: set it up */
>>  	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>>  	blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
>> +	if (sdkp->zones_max_open == U32_MAX)
>> +		blk_queue_max_open_zones(q, 0);
>> +	else
>> +		blk_queue_max_open_zones(q, sdkp->zones_max_open);
> 
> This is correct only for host-managed drives. Host-aware models define the
> "OPTIMAL NUMBER OF OPEN SEQUENTIAL WRITE PREFERRED ZONES" instead of a maximum
> number of open sequential write required zones.
> 
> Since the standard does not actually explicitly define what the value of the
> maximum number of open sequential write required zones should be for a
> host-aware drive, I would suggest to always have the max_open_zones value set to
> 0 for host-aware disks.
> 
>>  	nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
>>  
>>  	/* READ16/WRITE16 is mandatory for ZBC disks */
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index 8fd900998b4e..2f332f00501d 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -520,6 +520,7 @@ struct request_queue {
>>  	unsigned int		nr_zones;
>>  	unsigned long		*conv_zones_bitmap;
>>  	unsigned long		*seq_zones_wlock;
>> +	unsigned int		max_open_zones;
>>  #endif /* CONFIG_BLK_DEV_ZONED */
>>  
>>  	/*
>> @@ -729,6 +730,17 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q,
>>  		return true;
>>  	return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap);
>>  }
>> +
>> +static inline void blk_queue_max_open_zones(struct request_queue *q,
>> +		unsigned int max_open_zones)
>> +{
>> +	q->max_open_zones = max_open_zones;
>> +}
>> +
>> +static inline unsigned int queue_max_open_zones(const struct request_queue *q)
>> +{
>> +	return q->max_open_zones;
>> +}
>>  #else /* CONFIG_BLK_DEV_ZONED */
>>  static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
>>  {
>> @@ -744,6 +756,14 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q,
>>  {
>>  	return 0;
>>  }
>> +static inline void blk_queue_max_open_zones(struct request_queue *q,
>> +		unsigned int max_open_zones)
>> +{
>> +}
> 
> Why is this one necessary ? For the !CONFIG_BLK_DEV_ZONED case, no driver should
> ever call this function.
> 
>> +static inline unsigned int queue_max_open_zones(const struct request_queue *q)
>> +{
>> +	return 0;
>> +}
>>  #endif /* CONFIG_BLK_DEV_ZONED */
>>  
>>  static inline bool rq_is_sync(struct request *rq)
>>
> 
> 


-- 
Damien Le Moal
Western Digital Research




[Index of Archives]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux