Re: [PATCH v2] null_blk: add support for max open/active zone limit for zoned devices

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2020/08/27 22:50, Niklas Cassel wrote:
> Add support for user space to set a max open zone and a max active zone
> limit via configfs. By default, the default values are 0 == no limit.
> 
> Call the block layer API functions used for exposing the configured
> limits to sysfs.
> 
> Add accounting in null_blk_zoned so that these new limits are respected.
> Performing an operating that would exceed these limits results in a

Performing a write operation that would result in exceeding these...

> standard I/O error.
> 
> A max open zone limit exists in the ZBC standard.
> While null_blk_zoned is used to test the Zoned Block Device model in
> Linux, when it comes to differences between ZBC and ZNS, null_blk_zoned
> mostly follows ZBC.
> 
> Therefore, implement the manage open zone resources function from ZBC,
> but additionally add support for max active zones.
> This enables user space not only to test against a device with an open
> zone limit, but also to test against a device with an active zone limit.
> 
> Signed-off-by: Niklas Cassel <niklas.cassel@xxxxxxx>
> ---
> Changes since v1:
> -Fixed review comments by Damien Le Moal.
> 
>  drivers/block/null_blk.h       |   5 +
>  drivers/block/null_blk_main.c  |  16 +-
>  drivers/block/null_blk_zoned.c | 319 +++++++++++++++++++++++++++------
>  3 files changed, 282 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
> index daed4a9c34367..d2e7db43a52a7 100644
> --- a/drivers/block/null_blk.h
> +++ b/drivers/block/null_blk.h
> @@ -42,6 +42,9 @@ struct nullb_device {
>  	struct badblocks badblocks;
>  
>  	unsigned int nr_zones;
> +	unsigned int nr_zones_imp_open;
> +	unsigned int nr_zones_exp_open;
> +	unsigned int nr_zones_closed;
>  	struct blk_zone *zones;
>  	sector_t zone_size_sects;
>  
> @@ -51,6 +54,8 @@ struct nullb_device {
>  	unsigned long zone_size; /* zone size in MB if device is zoned */
>  	unsigned long zone_capacity; /* zone capacity in MB if device is zoned */
>  	unsigned int zone_nr_conv; /* number of conventional zones */
> +	unsigned int zone_max_open; /* max number of open zones */
> +	unsigned int zone_max_active; /* max number of active zones */
>  	unsigned int submit_queues; /* number of submission queues */
>  	unsigned int home_node; /* home node for the device */
>  	unsigned int queue_mode; /* block interface */
> diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
> index d74443a9c8fa2..53161a418611b 100644
> --- a/drivers/block/null_blk_main.c
> +++ b/drivers/block/null_blk_main.c
> @@ -208,6 +208,14 @@ static unsigned int g_zone_nr_conv;
>  module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444);
>  MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0");
>  
> +static unsigned int g_zone_max_open;
> +module_param_named(zone_max_open, g_zone_max_open, uint, 0444);
> +MODULE_PARM_DESC(zone_max_open, "Maximum number of open zones when block device is zoned. Default: 0 (no limit)");
> +
> +static unsigned int g_zone_max_active;
> +module_param_named(zone_max_active, g_zone_max_active, uint, 0444);
> +MODULE_PARM_DESC(zone_max_active, "Maximum number of active zones when block device is zoned. Default: 0 (no limit)");
> +
>  static struct nullb_device *null_alloc_dev(void);
>  static void null_free_dev(struct nullb_device *dev);
>  static void null_del_dev(struct nullb *nullb);
> @@ -347,6 +355,8 @@ NULLB_DEVICE_ATTR(zoned, bool, NULL);
>  NULLB_DEVICE_ATTR(zone_size, ulong, NULL);
>  NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
>  NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
> +NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
> +NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
>  
>  static ssize_t nullb_device_power_show(struct config_item *item, char *page)
>  {
> @@ -464,6 +474,8 @@ static struct configfs_attribute *nullb_device_attrs[] = {
>  	&nullb_device_attr_zone_size,
>  	&nullb_device_attr_zone_capacity,
>  	&nullb_device_attr_zone_nr_conv,
> +	&nullb_device_attr_zone_max_open,
> +	&nullb_device_attr_zone_max_active,
>  	NULL,
>  };
>  
> @@ -517,7 +529,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
>  static ssize_t memb_group_features_show(struct config_item *item, char *page)
>  {
>  	return snprintf(page, PAGE_SIZE,
> -			"memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv\n");
> +			"memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active\n");
>  }
>  
>  CONFIGFS_ATTR_RO(memb_group_, features);
> @@ -580,6 +592,8 @@ static struct nullb_device *null_alloc_dev(void)
>  	dev->zone_size = g_zone_size;
>  	dev->zone_capacity = g_zone_capacity;
>  	dev->zone_nr_conv = g_zone_nr_conv;
> +	dev->zone_max_open = g_zone_max_open;
> +	dev->zone_max_active = g_zone_max_active;
>  	return dev;
>  }
>  
> diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
> index 3d25c9ad23831..e7e341e811fbf 100644
> --- a/drivers/block/null_blk_zoned.c
> +++ b/drivers/block/null_blk_zoned.c
> @@ -51,6 +51,22 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
>  			dev->zone_nr_conv);
>  	}
>  
> +	/* Max active zones has to be < nbr of seq zones in order to be enforceable */
> +	if (dev->zone_max_active >= dev->nr_zones - dev->zone_nr_conv) {
> +		dev->zone_max_active = 0;
> +		pr_info("zone_max_active limit disabled, limit >= zone count\n");
> +	}
> +
> +	/* Max open zones has to be <= max active zones */
> +	if (dev->zone_max_active && dev->zone_max_open > dev->zone_max_active) {
> +		dev->zone_max_open = dev->zone_max_active;
> +		pr_info("changed the maximum number of open zones to %u\n",
> +			dev->nr_zones);
> +	} else if (dev->zone_max_open >= dev->nr_zones - dev->zone_nr_conv) {
> +		dev->zone_max_open = 0;
> +		pr_info("zone_max_open limit disabled, limit >= zone count\n");
> +	}
> +
>  	for (i = 0; i <  dev->zone_nr_conv; i++) {
>  		struct blk_zone *zone = &dev->zones[i];
>  
> @@ -99,6 +115,8 @@ int null_register_zoned_dev(struct nullb *nullb)
>  	}
>  
>  	blk_queue_max_zone_append_sectors(q, dev->zone_size_sects);
> +	blk_queue_max_open_zones(q, dev->zone_max_open);
> +	blk_queue_max_active_zones(q, dev->zone_max_active);
>  
>  	return 0;
>  }
> @@ -159,6 +177,103 @@ size_t null_zone_valid_read_len(struct nullb *nullb,
>  	return (zone->wp - sector) << SECTOR_SHIFT;
>  }
>  
> +static blk_status_t null_close_zone(struct nullb_device *dev, struct blk_zone *zone)
> +{
> +	if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
> +		return BLK_STS_IOERR;
> +
> +	switch (zone->cond) {
> +	case BLK_ZONE_COND_CLOSED:
> +		/* close operation on closed is not an error */
> +		return BLK_STS_OK;
> +	case BLK_ZONE_COND_IMP_OPEN:
> +		dev->nr_zones_imp_open--;
> +		break;
> +	case BLK_ZONE_COND_EXP_OPEN:
> +		dev->nr_zones_exp_open--;
> +		break;
> +	case BLK_ZONE_COND_EMPTY:
> +	case BLK_ZONE_COND_FULL:
> +	default:
> +		return BLK_STS_IOERR;
> +	}
> +
> +	if (zone->wp == zone->start) {
> +		zone->cond = BLK_ZONE_COND_EMPTY;
> +	} else {
> +		zone->cond = BLK_ZONE_COND_CLOSED;
> +		dev->nr_zones_closed++;
> +	}
> +
> +	return BLK_STS_OK;
> +}
> +
> +static void null_close_first_imp_zone(struct nullb_device *dev)
> +{
> +	unsigned int i;
> +
> +	for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
> +		if (dev->zones[i].cond == BLK_ZONE_COND_IMP_OPEN) {
> +			null_close_zone(dev, &dev->zones[i]);
> +			return;
> +		}
> +	}
> +}
> +
> +static bool null_can_set_active(struct nullb_device *dev)
> +{
> +	if (!dev->zone_max_active)
> +		return true;
> +
> +	return dev->nr_zones_exp_open + dev->nr_zones_imp_open +
> +	       dev->nr_zones_closed < dev->zone_max_active;
> +}
> +
> +static bool null_can_open(struct nullb_device *dev)
> +{
> +	if (!dev->zone_max_open)
> +		return true;
> +
> +	if (dev->nr_zones_exp_open + dev->nr_zones_imp_open < dev->zone_max_open)
> +		return true;
> +
> +	if (dev->nr_zones_imp_open && null_can_set_active(dev)) {
> +		null_close_first_imp_zone(dev);
> +		return true;
> +	}
> +
> +	return false;
> +}
> +
> +/*
> + * This function matches the manage open zone resources function in the ZBC standard,
> + * with the addition of max active zones support (added in the ZNS standard).
> + *
> + * The function determines if a zone can transition to implicit open or explicit open,
> + * while maintaining the max open zone (and max active zone) limit(s). It may close an
> + * implicit open zone in order to make additional zone resources available.
> + *
> + * ZBC states that an implicit open zone shall be closed only if there is not
> + * room within the open limit. However, with the addition of an active limit,
> + * it is not certain that closing an implicit open zone will allow a new zone
> + * to be opened, since we might already be at the active limit capacity.
> + */
> +static bool null_manage_zone_resources(struct nullb_device *dev, struct blk_zone *zone)

I still do not like the name. Since this return a bool, what about
null_has_zone_resources() ?

> +{
> +	switch (zone->cond) {
> +	case BLK_ZONE_COND_EMPTY:
> +		if (!null_can_set_active(dev))
> +			return false;
> +		fallthrough;
> +	case BLK_ZONE_COND_CLOSED:
> +		return null_can_open(dev);
> +	default:
> +		/* Should never be called for other states */
> +		WARN_ON(1);
> +		return false;
> +	}
> +}
> +
>  static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
>  				    unsigned int nr_sectors, bool append)
>  {
> @@ -177,43 +292,155 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
>  		/* Cannot write to a full zone */
>  		return BLK_STS_IOERR;
>  	case BLK_ZONE_COND_EMPTY:
> +	case BLK_ZONE_COND_CLOSED:
> +		if (!null_manage_zone_resources(dev, zone))
> +			return BLK_STS_IOERR;
> +		break;
>  	case BLK_ZONE_COND_IMP_OPEN:
>  	case BLK_ZONE_COND_EXP_OPEN:
> +		break;
> +	default:
> +		/* Invalid zone condition */
> +		return BLK_STS_IOERR;
> +	}
> +
> +	/*
> +	 * Regular writes must be at the write pointer position.
> +	 * Zone append writes are automatically issued at the write
> +	 * pointer and the position returned using the request or BIO
> +	 * sector.
> +	 */
> +	if (append) {
> +		sector = zone->wp;
> +		if (cmd->bio)
> +			cmd->bio->bi_iter.bi_sector = sector;
> +		else
> +			cmd->rq->__sector = sector;
> +	} else if (sector != zone->wp) {
> +		return BLK_STS_IOERR;
> +	}
> +
> +	if (zone->wp + nr_sectors > zone->start + zone->capacity)
> +		return BLK_STS_IOERR;
> +
> +	if (zone->cond == BLK_ZONE_COND_CLOSED) {
> +		dev->nr_zones_closed--;
> +		dev->nr_zones_imp_open++;
> +	} else if (zone->cond == BLK_ZONE_COND_EMPTY) {
> +		dev->nr_zones_imp_open++;
> +	}
> +	if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
> +		zone->cond = BLK_ZONE_COND_IMP_OPEN;
> +
> +	ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
> +	if (ret != BLK_STS_OK)
> +		return ret;
> +
> +	zone->wp += nr_sectors;
> +	if (zone->wp == zone->start + zone->capacity) {
> +		if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
> +			dev->nr_zones_exp_open--;
> +		else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
> +			dev->nr_zones_imp_open--;
> +		zone->cond = BLK_ZONE_COND_FULL;
> +	}
> +	return BLK_STS_OK;
> +}
> +
> +static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zone)
> +{
> +	if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
> +		return BLK_STS_IOERR;
> +
> +	switch (zone->cond) {
> +	case BLK_ZONE_COND_EXP_OPEN:
> +		/* open operation on exp open is not an error */
> +		return BLK_STS_OK;
> +	case BLK_ZONE_COND_EMPTY:
> +		if (!null_manage_zone_resources(dev, zone))
> +			return BLK_STS_IOERR;
> +		break;
> +	case BLK_ZONE_COND_IMP_OPEN:
> +		dev->nr_zones_imp_open--;
> +		break;
>  	case BLK_ZONE_COND_CLOSED:
> -		/*
> -		 * Regular writes must be at the write pointer position.
> -		 * Zone append writes are automatically issued at the write
> -		 * pointer and the position returned using the request or BIO
> -		 * sector.
> -		 */
> -		if (append) {
> -			sector = zone->wp;
> -			if (cmd->bio)
> -				cmd->bio->bi_iter.bi_sector = sector;
> -			else
> -				cmd->rq->__sector = sector;
> -		} else if (sector != zone->wp) {
> +		if (!null_manage_zone_resources(dev, zone))
>  			return BLK_STS_IOERR;
> -		}
> +		dev->nr_zones_closed--;
> +		break;
> +	case BLK_ZONE_COND_FULL:
> +	default:
> +		return BLK_STS_IOERR;
> +	}
> +
> +	zone->cond = BLK_ZONE_COND_EXP_OPEN;
> +	dev->nr_zones_exp_open++;
>  
> -		if (zone->wp + nr_sectors > zone->start + zone->capacity)
> +	return BLK_STS_OK;
> +}
> +
> +static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *zone)
> +{
> +	if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
> +		return BLK_STS_IOERR;
> +
> +	switch (zone->cond) {
> +	case BLK_ZONE_COND_FULL:
> +		/* finish operation on full is not an error */
> +		return BLK_STS_OK;
> +	case BLK_ZONE_COND_EMPTY:
> +		if (!null_manage_zone_resources(dev, zone))

OK. So you are hitting a fuzzy case here that is not actually well described in
the standards. That is, does finishing an empty zone necessarilly imply a
temporary transition through imp open ? Which you are assuming is a yes here.
Personally, I would say that is not necessary, but no strong feeling either way.

>  			return BLK_STS_IOERR;
> +		break;
> +	case BLK_ZONE_COND_IMP_OPEN:
> +		dev->nr_zones_imp_open--;
> +		break;
> +	case BLK_ZONE_COND_EXP_OPEN:
> +		dev->nr_zones_exp_open--;
> +		break;
> +	case BLK_ZONE_COND_CLOSED:
> +		if (!null_manage_zone_resources(dev, zone))
> +			return BLK_STS_IOERR;
> +		dev->nr_zones_closed--;
> +		break;
> +	default:
> +		return BLK_STS_IOERR;
> +	}
>  
> -		if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
> -			zone->cond = BLK_ZONE_COND_IMP_OPEN;
> +	zone->cond = BLK_ZONE_COND_FULL;
> +	zone->wp = zone->start + zone->len;
>  
> -		ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
> -		if (ret != BLK_STS_OK)
> -			return ret;
> +	return BLK_STS_OK;
> +}
> +
> +static blk_status_t null_reset_zone(struct nullb_device *dev, struct blk_zone *zone)
> +{
> +	if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
> +		return BLK_STS_IOERR;
>  
> -		zone->wp += nr_sectors;
> -		if (zone->wp == zone->start + zone->capacity)
> -			zone->cond = BLK_ZONE_COND_FULL;
> +	switch (zone->cond) {
> +	case BLK_ZONE_COND_EMPTY:
> +		/* reset operation on empty is not an error */
>  		return BLK_STS_OK;
> +	case BLK_ZONE_COND_IMP_OPEN:
> +		dev->nr_zones_imp_open--;
> +		break;
> +	case BLK_ZONE_COND_EXP_OPEN:
> +		dev->nr_zones_exp_open--;
> +		break;
> +	case BLK_ZONE_COND_CLOSED:
> +		dev->nr_zones_closed--;
> +		break;
> +	case BLK_ZONE_COND_FULL:
> +		break;
>  	default:
> -		/* Invalid zone condition */
>  		return BLK_STS_IOERR;
>  	}
> +
> +	zone->cond = BLK_ZONE_COND_EMPTY;
> +	zone->wp = zone->start;
> +
> +	return BLK_STS_OK;
>  }
>  
>  static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
> @@ -222,56 +449,34 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
>  	struct nullb_device *dev = cmd->nq->dev;
>  	unsigned int zone_no = null_zone_no(dev, sector);
>  	struct blk_zone *zone = &dev->zones[zone_no];
> +	blk_status_t ret = BLK_STS_OK;
>  	size_t i;
>  
>  	switch (op) {
>  	case REQ_OP_ZONE_RESET_ALL:
> -		for (i = 0; i < dev->nr_zones; i++) {
> -			if (zone[i].type == BLK_ZONE_TYPE_CONVENTIONAL)
> -				continue;
> -			zone[i].cond = BLK_ZONE_COND_EMPTY;
> -			zone[i].wp = zone[i].start;
> -		}
> +		for (i = dev->zone_nr_conv; i < dev->nr_zones; i++)
> +			null_reset_zone(dev, &dev->zones[i]);
>  		break;
>  	case REQ_OP_ZONE_RESET:
> -		if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
> -			return BLK_STS_IOERR;
> -
> -		zone->cond = BLK_ZONE_COND_EMPTY;
> -		zone->wp = zone->start;
> +		ret = null_reset_zone(dev, zone);
>  		break;
>  	case REQ_OP_ZONE_OPEN:
> -		if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
> -			return BLK_STS_IOERR;
> -		if (zone->cond == BLK_ZONE_COND_FULL)
> -			return BLK_STS_IOERR;
> -
> -		zone->cond = BLK_ZONE_COND_EXP_OPEN;
> +		ret = null_open_zone(dev, zone);
>  		break;
>  	case REQ_OP_ZONE_CLOSE:
> -		if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
> -			return BLK_STS_IOERR;
> -		if (zone->cond == BLK_ZONE_COND_FULL)
> -			return BLK_STS_IOERR;
> -
> -		if (zone->wp == zone->start)
> -			zone->cond = BLK_ZONE_COND_EMPTY;
> -		else
> -			zone->cond = BLK_ZONE_COND_CLOSED;
> +		ret = null_close_zone(dev, zone);
>  		break;
>  	case REQ_OP_ZONE_FINISH:
> -		if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
> -			return BLK_STS_IOERR;
> -
> -		zone->cond = BLK_ZONE_COND_FULL;
> -		zone->wp = zone->start + zone->len;
> +		ret = null_finish_zone(dev, zone);
>  		break;
>  	default:
>  		return BLK_STS_NOTSUPP;
>  	}
>  
> -	trace_nullb_zone_op(cmd, zone_no, zone->cond);
> -	return BLK_STS_OK;
> +	if (ret == BLK_STS_OK)
> +		trace_nullb_zone_op(cmd, zone_no, zone->cond);
> +
> +	return ret;
>  }
>  
>  blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op,
> 

Apart from the commit message and function name nits above, this looks good to me.

Reviewed-by: Damien Le Moal <damien.lemoal@xxxxxxx>

-- 
Damien Le Moal
Western Digital Research




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux