On 4/10/23 00:46, Damien Le Moal wrote:
Given that this change has nothing specific to zoned devices, you could rewrite the commit message to mention that. And are bfq and kyber OK with this change as well ? Also, to be consistent with this change, shouldn't blk_mq_sched_bypass_insert() be updated as well ? That function is called from blk_mq_sched_insert_request().
Hi Damien, I'm considering to replace patch 02/12 from this series by the patch below: Subject: [PATCH] block: Send flush requests to the I/O scheduler Send flush requests to the I/O scheduler such that I/O scheduler policies are applied to writes with the FUA flag set. Separate the I/O scheduler members from the flush members in struct request since with this patch applied a request may pass through both an I/O scheduler and the flush machinery. This change affects the statistics of I/O schedulers that track I/O statistics (BFQ and mq-deadline). Cc: Christoph Hellwig <hch@xxxxxx> Cc: Damien Le Moal <damien.lemoal@xxxxxxxxxxxxxxxxxx> Cc: Ming Lei <ming.lei@xxxxxxxxxx> Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx> --- block/blk-flush.c | 8 ++++++-- block/blk-mq-sched.c | 19 +++++++++++++++---- block/blk-mq-sched.h | 1 + block/blk-mq.c | 22 +++++----------------- include/linux/blk-mq.h | 27 +++++++++++---------------- 5 files changed, 38 insertions(+), 39 deletions(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index 53202eff545e..cf0afb75fafd 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -336,8 +336,11 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, * account of this driver tag */ flush_rq->rq_flags |= RQF_MQ_INFLIGHT; - } else + } else { flush_rq->internal_tag = first_rq->internal_tag; + flush_rq->rq_flags |= RQF_ELV; + blk_mq_sched_prepare(flush_rq); + } flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH; flush_rq->cmd_flags |= (flags & REQ_DRV) | (flags & REQ_FAILFAST_MASK); @@ -432,7 +435,8 @@ void blk_insert_flush(struct request *rq) */ if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { - blk_mq_request_bypass_insert(rq, false, true); + blk_mq_sched_insert_request(rq, /*at_head=*/false, + /*run_queue=*/true, /*async=*/true); return; } diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 62c8c1ba1321..9938c35aa7ed 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -18,6 +18,20 @@ #include "blk-mq-tag.h" #include "blk-wbt.h" +/* Prepare a request for insertion into an I/O scheduler. */ +void blk_mq_sched_prepare(struct request *rq) +{ + struct elevator_queue *e = rq->q->elevator; + + INIT_HLIST_NODE(&rq->hash); + RB_CLEAR_NODE(&rq->rb_node); + + if (e->type->ops.prepare_request) { + e->type->ops.prepare_request(rq); + rq->rq_flags |= RQF_ELVPRIV; + } +} + /* * Mark a hardware queue as needing a restart. */ @@ -397,10 +411,7 @@ static bool blk_mq_sched_bypass_insert(struct request *rq) * passthrough request is added to scheduler queue, there isn't any * chance to dispatch it given we prioritize requests in hctx->dispatch. */ - if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq)) - return true; - - return false; + return req_op(rq) == REQ_OP_FLUSH || blk_rq_is_passthrough(rq); } void blk_mq_sched_insert_request(struct request *rq, bool at_head, diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 025013972453..6337c5a66af6 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -8,6 +8,7 @@ #define MAX_SCHED_RQ (16 * BLKDEV_DEFAULT_RQ) +void blk_mq_sched_prepare(struct request *rq); bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs, struct request **merged_request); bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, diff --git a/block/blk-mq.c b/block/blk-mq.c index db93b1a71157..2731466b1952 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -384,18 +384,8 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, WRITE_ONCE(rq->deadline, 0); req_ref_set(rq, 1); - if (rq->rq_flags & RQF_ELV) { - struct elevator_queue *e = data->q->elevator; - - INIT_HLIST_NODE(&rq->hash); - RB_CLEAR_NODE(&rq->rb_node); - - if (!op_is_flush(data->cmd_flags) && - e->type->ops.prepare_request) { - e->type->ops.prepare_request(rq); - rq->rq_flags |= RQF_ELVPRIV; - } - } + if (rq->rq_flags & RQF_ELV) + blk_mq_sched_prepare(rq); return rq; } @@ -452,13 +442,11 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data) data->rq_flags |= RQF_ELV; /* - * Flush/passthrough requests are special and go directly to the - * dispatch list. Don't include reserved tags in the - * limiting, as it isn't useful. + * Do not limit the depth for passthrough requests nor for + * requests with a reserved tag. */ - if (!op_is_flush(data->cmd_flags) && + if (e->type->ops.limit_depth && !blk_op_is_passthrough(data->cmd_flags) && - e->type->ops.limit_depth && !(data->flags & BLK_MQ_REQ_RESERVED)) e->type->ops.limit_depth(data->cmd_flags, data); } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 06caacd77ed6..5e6c79ad83d2 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -169,25 +169,20 @@ struct request { void *completion_data; }; - /* * Three pointers are available for the IO schedulers, if they need - * more they have to dynamically allocate it. Flush requests are - * never put on the IO scheduler. So let the flush fields share - * space with the elevator data. + * more they have to dynamically allocate it. */ - union { - struct { - struct io_cq *icq; - void *priv[2]; - } elv; - - struct { - unsigned int seq; - struct list_head list; - rq_end_io_fn *saved_end_io; - } flush; - }; + struct { + struct io_cq *icq; + void *priv[2]; + } elv; + + struct { + unsigned int seq; + struct list_head list; + rq_end_io_fn *saved_end_io; + } flush; union { struct __call_single_data csd;