On 6/9/21 1:07 AM, Bart Van Assche wrote: > For interactive workloads it is important that synchronous requests are > not delayed. Hence reserve 25% of scheduler tags for synchronous requests. > This patch still allows asynchronous requests to fill the hardware queues > since blk_mq_init_sched() makes sure that the number of scheduler requests > is the double of the hardware queue depth. From blk_mq_init_sched(): > > q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth, > BLKDEV_MAX_RQ); > > Cc: Damien Le Moal <damien.lemoal@xxxxxxx> > Cc: Hannes Reinecke <hare@xxxxxxx> > Cc: Christoph Hellwig <hch@xxxxxx> > Cc: Ming Lei <ming.lei@xxxxxxxxxx> > Cc: Johannes Thumshirn <johannes.thumshirn@xxxxxxx> > Cc: Himanshu Madhani <himanshu.madhani@xxxxxxxxxx> > Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx> > --- > block/mq-deadline.c | 52 +++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 52 insertions(+) > > diff --git a/block/mq-deadline.c b/block/mq-deadline.c > index 1d1bb7a41d2a..a7d0584437d1 100644 > --- a/block/mq-deadline.c > +++ b/block/mq-deadline.c > @@ -67,6 +67,7 @@ struct deadline_data { > int fifo_batch; > int writes_starved; > int front_merges; > + u32 async_depth; > > spinlock_t lock; > spinlock_t zone_lock; > @@ -397,6 +398,44 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) > return rq; > } > > +/* > + * Called by __blk_mq_alloc_request(). The shallow_depth value set by this > + * function is used by __blk_mq_get_tag(). > + */ > +static void dd_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) > +{ > + struct deadline_data *dd = data->q->elevator->elevator_data; > + > + /* Do not throttle synchronous reads. */ > + if (op_is_sync(op) && !op_is_write(op)) > + return; > + > + /* > + * Throttle asynchronous requests and writes such that these requests > + * do not block the allocation of synchronous requests. > + */ > + data->shallow_depth = dd->async_depth; > +} > + > +/* Called by blk_mq_update_nr_requests(). */ > +static void dd_depth_updated(struct blk_mq_hw_ctx *hctx) > +{ > + struct request_queue *q = hctx->queue; > + struct deadline_data *dd = q->elevator->elevator_data; > + struct blk_mq_tags *tags = hctx->sched_tags; > + > + dd->async_depth = max(1UL, 3 * q->nr_requests / 4); > + > + sbitmap_queue_min_shallow_depth(tags->bitmap_tags, dd->async_depth); > +} > + > +/* Called by blk_mq_init_hctx() and blk_mq_init_sched(). */ > +static int dd_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) > +{ > + dd_depth_updated(hctx); > + return 0; > +} > + > static void dd_exit_sched(struct elevator_queue *e) > { > struct deadline_data *dd = e->elevator_data; > @@ -733,6 +772,15 @@ static int deadline_starved_show(void *data, struct seq_file *m) > return 0; > } > > +static int dd_async_depth_show(void *data, struct seq_file *m) > +{ > + struct request_queue *q = data; > + struct deadline_data *dd = q->elevator->elevator_data; > + > + seq_printf(m, "%u\n", dd->async_depth); > + return 0; > +} > + > static void *deadline_dispatch_start(struct seq_file *m, loff_t *pos) > __acquires(&dd->lock) > { > @@ -775,6 +823,7 @@ static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = { > DEADLINE_QUEUE_DDIR_ATTRS(write), > {"batching", 0400, deadline_batching_show}, > {"starved", 0400, deadline_starved_show}, > + {"async_depth", 0400, dd_async_depth_show}, > {"dispatch", 0400, .seq_ops = &deadline_dispatch_seq_ops}, > {}, > }; > @@ -783,6 +832,8 @@ static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = { > > static struct elevator_type mq_deadline = { > .ops = { > + .depth_updated = dd_depth_updated, > + .limit_depth = dd_limit_depth, > .insert_requests = dd_insert_requests, > .dispatch_request = dd_dispatch_request, > .prepare_request = dd_prepare_request, > @@ -796,6 +847,7 @@ static struct elevator_type mq_deadline = { > .has_work = dd_has_work, > .init_sched = dd_init_sched, > .exit_sched = dd_exit_sched, > + .init_hctx = dd_init_hctx, > }, > > #ifdef CONFIG_BLK_DEBUG_FS > Can't say I like heuristics. Might be needed, but there will always be use-cases where the heuristics fail. Can't we make this value configurable via sysfs? Cheers, Hannes -- Dr. Hannes Reinecke Kernel Storage Architect hare@xxxxxxx +49 911 74053 688 SUSE Software Solutions Germany GmbH, 90409 Nürnberg GF: F. Imendörffer, HRB 36809 (AG Nürnberg)