On 4/8/23 08:58, Bart Van Assche wrote: > Prevent that zoned writes with the FUA flag set are reordered against each > other or against other zoned writes. Separate the I/O scheduler members > from the flush members in struct request since with this patch applied a > request may pass through both an I/O scheduler and the flush machinery. > > Cc: Christoph Hellwig <hch@xxxxxx> > Cc: Damien Le Moal <damien.lemoal@xxxxxxxxxxxxxxxxxx> > Cc: Ming Lei <ming.lei@xxxxxxxxxx> > Cc: Mike Snitzer <snitzer@xxxxxxxxxx> > Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx> > --- > block/blk-flush.c | 3 ++- > block/blk-mq.c | 11 ++++------- > block/mq-deadline.c | 2 +- > include/linux/blk-mq.h | 27 +++++++++++---------------- > 4 files changed, 18 insertions(+), 25 deletions(-) > > diff --git a/block/blk-flush.c b/block/blk-flush.c > index 53202eff545e..e0cf153388d8 100644 > --- a/block/blk-flush.c > +++ b/block/blk-flush.c > @@ -432,7 +432,8 @@ void blk_insert_flush(struct request *rq) > */ > if ((policy & REQ_FSEQ_DATA) && > !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { > - blk_mq_request_bypass_insert(rq, false, true); > + blk_mq_sched_insert_request(rq, /*at_head=*/false, > + /*run_queue=*/true, /*async=*/true); > return; > } > > diff --git a/block/blk-mq.c b/block/blk-mq.c > index fefc9a728e0e..250556546bbf 100644 > --- a/block/blk-mq.c > +++ b/block/blk-mq.c > @@ -390,8 +390,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, > INIT_HLIST_NODE(&rq->hash); > RB_CLEAR_NODE(&rq->rb_node); > > - if (!op_is_flush(data->cmd_flags) && > - e->type->ops.prepare_request) { > + if (e->type->ops.prepare_request) { > e->type->ops.prepare_request(rq); > rq->rq_flags |= RQF_ELVPRIV; > } > @@ -452,13 +451,11 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data) > data->rq_flags |= RQF_ELV; > > /* > - * Flush/passthrough requests are special and go directly to the > - * dispatch list. Don't include reserved tags in the > - * limiting, as it isn't useful. > + * Do not limit the depth for passthrough requests nor for > + * requests with a reserved tag. > */ > - if (!op_is_flush(data->cmd_flags) && > + if (e->type->ops.limit_depth && > !blk_op_is_passthrough(data->cmd_flags) && > - e->type->ops.limit_depth && > !(data->flags & BLK_MQ_REQ_RESERVED)) > e->type->ops.limit_depth(data->cmd_flags, data); > } > diff --git a/block/mq-deadline.c b/block/mq-deadline.c > index f10c2a0d18d4..d885ccf49170 100644 > --- a/block/mq-deadline.c > +++ b/block/mq-deadline.c > @@ -789,7 +789,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, > > prio = ioprio_class_to_prio[ioprio_class]; > per_prio = &dd->per_prio[prio]; > - if (!rq->elv.priv[0]) { > + if (!rq->elv.priv[0] && !(rq->rq_flags & RQF_FLUSH_SEQ)) { > per_prio->stats.inserted++; > rq->elv.priv[0] = (void *)(uintptr_t)1; > } Given that this change has nothing specific to zoned devices, you could rewrite the commit message to mention that. And are bfq and kyber OK with this change as well ? Also, to be consistent with this change, shouldn't blk_mq_sched_bypass_insert() be updated as well ? That function is called from blk_mq_sched_insert_request(). > diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h > index 06caacd77ed6..5e6c79ad83d2 100644 > --- a/include/linux/blk-mq.h > +++ b/include/linux/blk-mq.h > @@ -169,25 +169,20 @@ struct request { > void *completion_data; > }; > > - > /* > * Three pointers are available for the IO schedulers, if they need > - * more they have to dynamically allocate it. Flush requests are > - * never put on the IO scheduler. So let the flush fields share > - * space with the elevator data. > + * more they have to dynamically allocate it. > */ > - union { > - struct { > - struct io_cq *icq; > - void *priv[2]; > - } elv; > - > - struct { > - unsigned int seq; > - struct list_head list; > - rq_end_io_fn *saved_end_io; > - } flush; > - }; > + struct { > + struct io_cq *icq; > + void *priv[2]; > + } elv; > + > + struct { > + unsigned int seq; > + struct list_head list; > + rq_end_io_fn *saved_end_io; > + } flush; > > union { > struct __call_single_data csd;