Re: [PATCH v16 14/26] blk-mq: Restore the zoned write order when requeuing

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 11/19/24 09:28, Bart Van Assche wrote:
> Zoned writes may be requeued, e.g. if a block driver returns
> BLK_STS_RESOURCE. Requests may be requeued in another order than
> submitted. Restore the request order if requests are requeued.
> 
> Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx>
> ---
>  block/bfq-iosched.c    |  2 ++
>  block/blk-mq.c         | 20 +++++++++++++++++++-
>  block/blk-mq.h         |  2 ++
>  block/kyber-iosched.c  |  2 ++
>  block/mq-deadline.c    |  7 ++++++-
>  include/linux/blk-mq.h |  2 +-
>  6 files changed, 32 insertions(+), 3 deletions(-)
> 
> diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
> index 0747d9d0e48c..13bedbf03bd2 100644
> --- a/block/bfq-iosched.c
> +++ b/block/bfq-iosched.c
> @@ -6265,6 +6265,8 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
>  
>  	if (flags & BLK_MQ_INSERT_AT_HEAD) {
>  		list_add(&rq->queuelist, &bfqd->dispatch);
> +	} else if (flags & BLK_MQ_INSERT_ORDERED) {
> +		blk_mq_insert_ordered(rq, &bfqd->dispatch);
>  	} else if (!bfqq) {
>  		list_add_tail(&rq->queuelist, &bfqd->dispatch);
>  	} else {
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index f134d5e1c4a1..1302ccbf2a7d 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -1564,7 +1564,9 @@ static void blk_mq_requeue_work(struct work_struct *work)
>  		 * already.  Insert it into the hctx dispatch list to avoid
>  		 * block layer merges for the request.
>  		 */
> -		if (rq->rq_flags & RQF_DONTPREP)
> +		if (blk_rq_is_seq_zoned_write(rq))
> +			blk_mq_insert_request(rq, BLK_MQ_INSERT_ORDERED);

Is this OK to do without any starvation prevention ? A high LBA write that
constantly gets requeued behind low LBA writes could end up in a timeout
situation, no ?

> +		else if (rq->rq_flags & RQF_DONTPREP)
>  			blk_mq_request_bypass_insert(rq, 0);
>  		else
>  			blk_mq_insert_request(rq, BLK_MQ_INSERT_AT_HEAD);
> @@ -2599,6 +2601,20 @@ static void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx,
>  	blk_mq_run_hw_queue(hctx, run_queue_async);
>  }
>  
> +void blk_mq_insert_ordered(struct request *rq, struct list_head *list)
> +{
> +	struct request_queue *q = rq->q;
> +	struct request *rq2;
> +
> +	list_for_each_entry(rq2, list, queuelist)
> +		if (rq2->q == q && blk_rq_pos(rq2) > blk_rq_pos(rq))
> +			break;
> +
> +	/* Insert rq before rq2. If rq2 is the list head, append at the end. */
> +	list_add_tail(&rq->queuelist, &rq2->queuelist);
> +}
> +EXPORT_SYMBOL_GPL(blk_mq_insert_ordered);
> +
>  static void blk_mq_insert_request(struct request *rq, blk_insert_t flags)
>  {
>  	struct request_queue *q = rq->q;
> @@ -2653,6 +2669,8 @@ static void blk_mq_insert_request(struct request *rq, blk_insert_t flags)
>  		spin_lock(&ctx->lock);
>  		if (flags & BLK_MQ_INSERT_AT_HEAD)
>  			list_add(&rq->queuelist, &ctx->rq_lists[hctx->type]);
> +		else if (flags & BLK_MQ_INSERT_ORDERED)
> +			blk_mq_insert_ordered(rq, &ctx->rq_lists[hctx->type]);
>  		else
>  			list_add_tail(&rq->queuelist,
>  				      &ctx->rq_lists[hctx->type]);
> diff --git a/block/blk-mq.h b/block/blk-mq.h
> index 309db553aba6..10b9fb3ca762 100644
> --- a/block/blk-mq.h
> +++ b/block/blk-mq.h
> @@ -40,8 +40,10 @@ enum {
>  
>  typedef unsigned int __bitwise blk_insert_t;
>  #define BLK_MQ_INSERT_AT_HEAD		((__force blk_insert_t)0x01)
> +#define BLK_MQ_INSERT_ORDERED		((__force blk_insert_t)0x02)
>  
>  void blk_mq_submit_bio(struct bio *bio);
> +void blk_mq_insert_ordered(struct request *rq, struct list_head *list);
>  int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
>  		unsigned int flags);
>  void blk_mq_exit_queue(struct request_queue *q);
> diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
> index 4155594aefc6..77bb41bab68d 100644
> --- a/block/kyber-iosched.c
> +++ b/block/kyber-iosched.c
> @@ -603,6 +603,8 @@ static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
>  		trace_block_rq_insert(rq);
>  		if (flags & BLK_MQ_INSERT_AT_HEAD)
>  			list_move(&rq->queuelist, head);
> +		else if (flags & BLK_MQ_INSERT_ORDERED)
> +			blk_mq_insert_ordered(rq, head);
>  		else
>  			list_move_tail(&rq->queuelist, head);
>  		sbitmap_set_bit(&khd->kcq_map[sched_domain],
> diff --git a/block/mq-deadline.c b/block/mq-deadline.c
> index 2edf84b1bc2a..200e5a2928ce 100644
> --- a/block/mq-deadline.c
> +++ b/block/mq-deadline.c
> @@ -711,7 +711,12 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
>  		 * set expire time and add to fifo list
>  		 */
>  		rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
> -		list_add_tail(&rq->queuelist, &per_prio->fifo_list[data_dir]);
> +		if (flags & BLK_MQ_INSERT_ORDERED)
> +			blk_mq_insert_ordered(rq,
> +					      &per_prio->fifo_list[data_dir]);
> +		else
> +			list_add_tail(&rq->queuelist,
> +				      &per_prio->fifo_list[data_dir]);
>  	}
>  }
>  
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index ac05974f08f9..f7514eefccfd 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -85,7 +85,7 @@ enum {
>  
>  /* flags that prevent us from merging requests: */
>  #define RQF_NOMERGE_FLAGS \
> -	(RQF_STARTED | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
> +	(RQF_STARTED | RQF_FLUSH_SEQ | RQF_DONTPREP | RQF_SPECIAL_PAYLOAD)
>  
>  enum mq_rq_state {
>  	MQ_RQ_IDLE		= 0,


-- 
Damien Le Moal
Western Digital Research




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux