Re: [PATCH 3/3] virtio-blk: add support for zoned block devices

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 9/19/22 22:41, Pankaj Raghav wrote:
> Hi Dmitry,
> 
> On Sun, Sep 18, 2022 at 10:29:21PM -0400, Dmitry Fomichev wrote:
>> The zone-specific code in the patch is heavily influenced by NVMe ZNS
>> code in drivers/nvme/host/zns.c, but it is simpler because the proposed
>> virtio ZBD draft only covers the zoned device features that are
>> relevant to the zoned functionality provided by Linux block layer.
>>
> There is a parallel work going on to support non-po2 zone sizes in Linux
> block layer and drivers[1]. I don't see any reason why we shouldn't make
> the calculations generic here instead of putting the constraint on zone
> sectors to be po2 as the virtio spec also supports it.

That series is not upstream, so implementing against would not be the
correct approach, especially given that this would also impact qemu code.

> 
> I took a quick look, and changing the calculations from po2 specific to
> generic will not be in the hot path and can be trivially changed. I have
> suggested the changes inline to make the virtio blk driver zone size 
> agnostic. I haven't tested the changes but it is very
> similar to the changes I did in the drivers/nvme/host/zns.c in my patch
> series[2].
> 
> [1] https://lore.kernel.org/linux-block/20220912082204.51189-1-p.raghav@xxxxxxxxxxx/
> [2] https://lore.kernel.org/linux-block/20220912082204.51189-6-p.raghav@xxxxxxxxxxx/
> 
>> Co-developed-by: Stefan Hajnoczi <stefanha@xxxxxxxxx>
>> Signed-off-by: Stefan Hajnoczi <stefanha@xxxxxxxxx>
>> Signed-off-by: Dmitry Fomichev <dmitry.fomichev@xxxxxxx>
>> ---
>>  drivers/block/virtio_blk.c      | 381 ++++++++++++++++++++++++++++++--
>>  include/uapi/linux/virtio_blk.h | 106 +++++++++
>>  2 files changed, 469 insertions(+), 18 deletions(-)
>>
> <snip>
>> +#ifdef CONFIG_BLK_DEV_ZONED
>> +static void *virtblk_alloc_report_buffer(struct virtio_blk *vblk,
>> +					  unsigned int nr_zones,
>> +					  unsigned int zone_sectors,
>> +					  size_t *buflen)
>> +{
>> +	struct request_queue *q = vblk->disk->queue;
>> +	size_t bufsize;
>> +	void *buf;
>> +
> -	nr_zones = min_t(unsigned int, nr_zones,
> -			 get_capacity(vblk->disk) >> ilog2(zone_sectors));
> 
> +	nr_zones = min_t(unsigned int, nr_zones,
> +			 div64_u64(get_capacity(vblk->disk), zone_sectors));
> 
>> +
>> +	bufsize = sizeof(struct virtio_blk_zone_report) +
>> +		nr_zones * sizeof(struct virtio_blk_zone_descriptor);
>> +	bufsize = min_t(size_t, bufsize,
>> +			queue_max_hw_sectors(q) << SECTOR_SHIFT);
>> +	bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
>> +
>> +	while (bufsize >= sizeof(struct virtio_blk_zone_report)) {
>> +		buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
>> +		if (buf) {
>> +			*buflen = bufsize;
>> +			return buf;
>> +		}
>> +		bufsize >>= 1;
>> +	}
>> +
>> +	return NULL;
>> +}
>> +
>> +static int virtblk_submit_zone_report(struct virtio_blk *vblk,
>> +				       char *report_buf, size_t report_len,
>> +				       sector_t sector)
>> +{
>> +	struct request_queue *q = vblk->disk->queue;
>> +	struct request *req;
>> +	struct virtblk_req *vbr;
>> +	int err;
>> +
>> +	req = blk_mq_alloc_request(q, REQ_OP_DRV_IN, 0);
>> +	if (IS_ERR(req))
>> +		return PTR_ERR(req);
>> +
>> +	vbr = blk_mq_rq_to_pdu(req);
>> +	vbr->in_hdr_len = sizeof(vbr->status);
>> +	vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_ZONE_REPORT);
>> +	vbr->out_hdr.sector = cpu_to_virtio64(vblk->vdev, sector);
>> +
>> +	err = blk_rq_map_kern(q, req, report_buf, report_len, GFP_KERNEL);
>> +	if (err)
>> +		goto out;
>> +
>> +	blk_execute_rq(req, false);
>> +	err = blk_status_to_errno(virtblk_result(vbr->status));
>> +out:
>> +	blk_mq_free_request(req);
>> +	return err;
>> +}
>> +
>> +static int virtblk_parse_zone(struct virtio_blk *vblk,
>> +			       struct virtio_blk_zone_descriptor *entry,
>> +			       unsigned int idx, unsigned int zone_sectors,
>> +			       report_zones_cb cb, void *data)
>> +{
>> +	struct blk_zone zone = { };
>> +
>> +	if (entry->z_type != VIRTIO_BLK_ZT_SWR &&
>> +	    entry->z_type != VIRTIO_BLK_ZT_SWP &&
>> +	    entry->z_type != VIRTIO_BLK_ZT_CONV) {
>> +		dev_err(&vblk->vdev->dev, "invalid zone type %#x\n",
>> +			entry->z_type);
>> +		return -EINVAL;
>> +	}
>> +
>> +	zone.type = entry->z_type;
>> +	zone.cond = entry->z_state;
>> +	zone.len = zone_sectors;
>> +	zone.capacity = le64_to_cpu(entry->z_cap);
>> +	zone.start = le64_to_cpu(entry->z_start);
>> +	if (zone.cond == BLK_ZONE_COND_FULL)
>> +		zone.wp = zone.start + zone.len;
>> +	else
>> +		zone.wp = le64_to_cpu(entry->z_wp);
>> +
>> +	return cb(&zone, idx, data);
>> +}
>> +
>> +static int virtblk_report_zones(struct gendisk *disk, sector_t sector,
>> +				 unsigned int nr_zones, report_zones_cb cb,
>> +				 void *data)
>> +{
>> +	struct virtio_blk *vblk = disk->private_data;
>> +	struct virtio_blk_zone_report *report;
>> +	unsigned int zone_sectors = vblk->zone_sectors;
>> +	unsigned int nz, i;
>> +	int ret, zone_idx = 0;
>> +	size_t buflen;
> +	u64 remainder;
>> +
>> +	if (WARN_ON_ONCE(!vblk->zone_sectors))
>> +		return -EOPNOTSUPP;
>> +
>> +	report = virtblk_alloc_report_buffer(vblk, nr_zones,
>> +					     zone_sectors, &buflen);
>> +	if (!report)
>> +		return -ENOMEM;
>> +
> -	sector &= ~(zone_sectors - 1);
> 
> +	div64_u64_rem(sector, zone_sectors, &remainder);
> +	sector -= remainder;
>> +	while (zone_idx < nr_zones && sector < get_capacity(vblk->disk)) {
>> +		memset(report, 0, buflen);
>> +
>> +		ret = virtblk_submit_zone_report(vblk, (char *)report,
>> +						 buflen, sector);
>> +		if (ret) {
>> +			if (ret > 0)
>> +				ret = -EIO;
>> +			goto out_free;
>> +		}
>> +
>> +		nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones);
>> +		if (!nz)
>> +			break;
>> +
>> +		for (i = 0; i < nz && zone_idx < nr_zones; i++) {
>> +			ret = virtblk_parse_zone(vblk, &report->zones[i],
>> +						 zone_idx, zone_sectors, cb, data);
>> +			if (ret)
>> +				goto out_free;
>> +			zone_idx++;
>> +		}
>> +
>> +		sector += zone_sectors * nz;
>> +	}
>> +
>> +	if (zone_idx > 0)
>> +		ret = zone_idx;
>> +	else
>> +		ret = -EINVAL;
>> +out_free:
>> +	kvfree(report);
>> +	return ret;
>> +}
>> +
> <snip>
>> +static int virtblk_probe_zoned_device(struct virtio_device *vdev,
>> +				       struct virtio_blk *vblk,
>> +				       struct request_queue *q)
>> +{
> <snip>
>> +	blk_queue_physical_block_size(q, le32_to_cpu(v));
>> +	blk_queue_io_min(q, le32_to_cpu(v));
>> +
>> +	dev_dbg(&vdev->dev, "write granularity = %u\n", le32_to_cpu(v));
>> +
> -	/*
> -	 * virtio ZBD specification doesn't require zones to be a power of
> -	 * two sectors in size, but the code in this driver expects that.
> -	 */
> -	virtio_cread(vdev, struct virtio_blk_config, zoned.zone_sectors, &v);
> -	if (v == 0 || !is_power_of_2(v)) {
> -		dev_err(&vdev->dev,
> -			"zoned device with non power of two zone size %u\n", v);
> -		return -ENODEV;
> -	}
>> +
>> +	dev_dbg(&vdev->dev, "zone sectors = %u\n", le32_to_cpu(v));
>> +	vblk->zone_sectors = le32_to_cpu(v);
>> +
>> +	if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
>> +		dev_warn(&vblk->vdev->dev,
>> +			 "ignoring negotiated F_DISCARD for zoned device\n");
>> +		blk_queue_max_discard_sectors(q, 0);
>> +	}
>> +
>> +	ret = blk_revalidate_disk_zones(vblk->disk, NULL);
>> +	if (!ret) {
>> +		virtio_cread(vdev, struct virtio_blk_config,
>> +			     zoned.max_append_sectors, &v);
>> +		if (!v) {
>> +			dev_warn(&vdev->dev, "zero max_append_sectors reported\n");
>> +			return -ENODEV;
>> +		}
>> +		blk_queue_max_zone_append_sectors(q, le32_to_cpu(v));
>> +		dev_dbg(&vdev->dev, "max append sectors = %u\n", le32_to_cpu(v));
>> +
>> +	}
>> +
>> +	return ret;
>> +}
>> +
> 

-- 
Damien Le Moal
Western Digital Research




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux