Send requeued requests to the I/O scheduler such that the I/O scheduler can control the order in which requests are dispatched. This patch reworks commit aef1897cd36d ("blk-mq: insert rq with DONTPREP to hctx dispatch list when requeue"). Cc: Christoph Hellwig <hch@xxxxxx> Cc: Damien Le Moal <dlemoal@xxxxxxxxxx> Cc: Ming Lei <ming.lei@xxxxxxxxxx> Cc: Mike Snitzer <snitzer@xxxxxxxxxx> Cc: Jianchao Wang <jianchao.w.wang@xxxxxxxxxx> Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx> --- block/blk-mq.c | 36 +++++++++++++++++++++++------------- include/linux/blk-mq.h | 4 ++-- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index e79cc34ad962..632aee9af60f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1438,30 +1438,26 @@ static void blk_mq_requeue_work(struct work_struct *work) container_of(work, struct request_queue, requeue_work.work); LIST_HEAD(requeue_list); LIST_HEAD(flush_list); - struct request *rq; + struct request *rq, *next; spin_lock_irq(&q->requeue_lock); list_splice_init(&q->requeue_list, &requeue_list); list_splice_init(&q->flush_list, &flush_list); spin_unlock_irq(&q->requeue_lock); - while (!list_empty(&requeue_list)) { - rq = list_entry(requeue_list.next, struct request, queuelist); - /* - * If RQF_DONTPREP ist set, the request has been started by the - * driver already and might have driver-specific data allocated - * already. Insert it into the hctx dispatch list to avoid - * block layer merges for the request. - */ - if (rq->rq_flags & RQF_DONTPREP) { - list_del_init(&rq->queuelist); - blk_mq_request_bypass_insert(rq, 0); - } else { + list_for_each_entry_safe(rq, next, &requeue_list, queuelist) { + if (!(rq->rq_flags & RQF_DONTPREP)) { list_del_init(&rq->queuelist); blk_mq_insert_request(rq, BLK_MQ_INSERT_AT_HEAD); } } + while (!list_empty(&requeue_list)) { + rq = list_entry(requeue_list.next, struct request, queuelist); + list_del_init(&rq->queuelist); + blk_mq_insert_request(rq, 0); + } + while (!list_empty(&flush_list)) { rq = list_entry(flush_list.next, struct request, queuelist); list_del_init(&rq->queuelist); @@ -2064,14 +2060,28 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, bool no_tag = prep == PREP_DISPATCH_NO_TAG && ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) || blk_mq_is_shared_tags(hctx->flags)); + LIST_HEAD(for_sched); + struct request *next; if (nr_budgets) blk_mq_release_budgets(q, list); spin_lock(&hctx->lock); + list_for_each_entry_safe(rq, next, list, queuelist) + if (rq->rq_flags & RQF_USE_SCHED) + list_move_tail(&rq->queuelist, &for_sched); list_splice_tail_init(list, &hctx->dispatch); spin_unlock(&hctx->lock); + if (q->elevator) { + if (q->elevator->type->ops.requeue_request) + list_for_each_entry(rq, &for_sched, queuelist) + q->elevator->type->ops. + requeue_request(rq); + q->elevator->type->ops.insert_requests(hctx, &for_sched, + BLK_MQ_INSERT_AT_HEAD); + } + /* * Order adding requests to hctx->dispatch and checking * SCHED_RESTART flag. The pair of this smp_mb() is the one diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d778cb6b2112..363894aea0e8 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -62,8 +62,8 @@ typedef __u32 __bitwise req_flags_t; #define RQF_RESV ((__force req_flags_t)(1 << 23)) /* flags that prevent us from merging requests: */ -#define RQF_NOMERGE_FLAGS \ - (RQF_STARTED | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD) +#define RQF_NOMERGE_FLAGS \ + (RQF_STARTED | RQF_FLUSH_SEQ | RQF_DONTPREP | RQF_SPECIAL_PAYLOAD) enum mq_rq_state { MQ_RQ_IDLE = 0,