On 6/24/22 08:26, Bart Van Assche wrote: > Add a new configfs attribute for enabling pipelining of zoned writes. If > that attribute has been set, retry zoned writes that are not aligned with > the write pointer. The test script below reports 236 K IOPS with no I/O > scheduler, 81 K IOPS with mq-deadline and pipelining disabled and 121 K > IOPS with mq-deadline and pipelining enabled (+49%). > > #!/bin/bash > > for d in /sys/kernel/config/nullb/*; do > [ -d "$d" ] && rmdir "$d" > done > modprobe -r null_blk > set -e > modprobe null_blk nr_devices=0 > udevadm settle > ( > cd /sys/kernel/config/nullb > mkdir nullb0 > cd nullb0 > params=( > completion_nsec=100000 > hw_queue_depth=64 > irqmode=2 > memory_backed=1 > pipeline_zoned_writes=1 > size=1 > submit_queues=1 > zone_size=1 > zoned=1 > power=1 > ) > for p in "${params[@]}"; do > echo "${p//*=}" > "${p//=*}" > done > ) > params=( > --direct=1 > --filename=/dev/nullb0 > --iodepth=64 > --iodepth_batch=16 > --ioengine=io_uring > --ioscheduler=mq-deadline > --hipri=0 > --name=nullb0 > --runtime=30 > --rw=write > --time_based=1 > --zonemode=zbd > ) > fio "${params[@]}" > > Cc: Damien Le Moal <damien.lemoal@xxxxxxxxxxxxxxxxxx> > Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx> > --- > drivers/block/null_blk/main.c | 9 +++++++++ > drivers/block/null_blk/null_blk.h | 3 +++ > drivers/block/null_blk/zoned.c | 4 +++- > 3 files changed, 15 insertions(+), 1 deletion(-) > > diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c > index fd68e6f4637f..d5fc651ffc3d 100644 > --- a/drivers/block/null_blk/main.c > +++ b/drivers/block/null_blk/main.c > @@ -408,6 +408,7 @@ NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL); > NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL); > NULLB_DEVICE_ATTR(zone_max_open, uint, NULL); > NULLB_DEVICE_ATTR(zone_max_active, uint, NULL); > +NULLB_DEVICE_ATTR(pipeline_zoned_writes, bool, NULL); > NULLB_DEVICE_ATTR(virt_boundary, bool, NULL); > > static ssize_t nullb_device_power_show(struct config_item *item, char *page) > @@ -531,6 +532,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { > &nullb_device_attr_zone_nr_conv, > &nullb_device_attr_zone_max_open, > &nullb_device_attr_zone_max_active, > + &nullb_device_attr_pipeline_zoned_writes, > &nullb_device_attr_virt_boundary, > NULL, > }; > @@ -1626,6 +1628,11 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, > cmd->error = BLK_STS_OK; > cmd->nq = nq; > cmd->fake_timeout = should_timeout_request(rq); > + if (!(rq->rq_flags & RQF_DONTPREP)) { > + rq->rq_flags |= RQF_DONTPREP; > + cmd->retries = 0; > + cmd->max_attempts = rq->q->nr_hw_queues * rq->q->nr_requests; > + } > > blk_mq_start_request(rq); > > @@ -2042,6 +2049,8 @@ static int null_add_dev(struct nullb_device *dev) > nullb->q->queuedata = nullb; > blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q); > blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q); > + if (dev->pipeline_zoned_writes) > + blk_queue_flag_set(QUEUE_FLAG_PIPELINE_ZONED_WRITES, nullb->q); > > mutex_lock(&lock); > nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL); > diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h > index 8359b43842f2..bbe2cb17bdbd 100644 > --- a/drivers/block/null_blk/null_blk.h > +++ b/drivers/block/null_blk/null_blk.h > @@ -23,6 +23,8 @@ struct nullb_cmd { > unsigned int tag; > blk_status_t error; > bool fake_timeout; > + u16 retries; > + u16 max_attempts; > struct nullb_queue *nq; > struct hrtimer timer; > }; > @@ -112,6 +114,7 @@ struct nullb_device { > bool memory_backed; /* if data is stored in memory */ > bool discard; /* if support discard */ > bool zoned; /* if device is zoned */ > + bool pipeline_zoned_writes; > bool virt_boundary; /* virtual boundary on/off for the device */ > }; > > diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c > index 2fdd7b20c224..8d0a5e16f4b1 100644 > --- a/drivers/block/null_blk/zoned.c > +++ b/drivers/block/null_blk/zoned.c > @@ -403,7 +403,9 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, > else > cmd->bio->bi_iter.bi_sector = sector; > } else if (sector != zone->wp) { > - ret = BLK_STS_IOERR; > + ret = dev->pipeline_zoned_writes && > + ++cmd->retries < cmd->max_attempts ? > + BLK_STS_DEV_RESOURCE : BLK_STS_IOERR; Hard to read. Could you change this to a plain "if()" ? if (dev->pipeline_zoned_writes && ++cmd->retries < cmd->max_attempts) ret = BLK_STS_DEV_RESOURCE; else ret = BLK_STS_IOERR; Adding a comment on top of this hunk explaining the difference between the 2 cases would be nice too. > goto unlock; > } > -- Damien Le Moal Western Digital Research