This function is introduced for dequeuing request from sw queue so that we can dispatch it in scheduler's way. More importantly, some SCSI devices may set q->queue_depth, which is a per-request_queue limit, and applied on pending I/O from all hctxs. This function is introduced for avoiding to dequeue too many requests from sw queue when ->dispatch isn't flushed completely. Tested-by: Oleksandr Natalenko <oleksandr@xxxxxxxxxxxxxx> Tested-by: Tom Nguyen <tom81094@xxxxxxxxx> Tested-by: Paolo Valente <paolo.valente@xxxxxxxxxx> Reviewed-by: Bart Van Assche <bart.vanassche@xxxxxxx> Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> --- block/blk-mq.c | 38 ++++++++++++++++++++++++++++++++++++++ block/blk-mq.h | 2 ++ 2 files changed, 40 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index d1b9fb539eba..8b49af1ade7f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -882,6 +882,44 @@ void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list) } EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs); +struct dispatch_rq_data { + struct blk_mq_hw_ctx *hctx; + struct request *rq; +}; + +static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr, void *data) +{ + struct dispatch_rq_data *dispatch_data = data; + struct blk_mq_hw_ctx *hctx = dispatch_data->hctx; + struct blk_mq_ctx *ctx = hctx->ctxs[bitnr]; + + spin_lock(&ctx->lock); + if (unlikely(!list_empty(&ctx->rq_list))) { + dispatch_data->rq = list_entry_rq(ctx->rq_list.next); + list_del_init(&dispatch_data->rq->queuelist); + if (list_empty(&ctx->rq_list)) + sbitmap_clear_bit(sb, bitnr); + } + spin_unlock(&ctx->lock); + + return !dispatch_data->rq; +} + +struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, + struct blk_mq_ctx *start) +{ + unsigned off = start ? start->index_hw : 0; + struct dispatch_rq_data data = { + .hctx = hctx, + .rq = NULL, + }; + + __sbitmap_for_each_set(&hctx->ctx_map, off, + dispatch_rq_from_ctx, &data); + + return data.rq; +} + static inline unsigned int queued_to_index(unsigned int queued) { if (!queued) diff --git a/block/blk-mq.h b/block/blk-mq.h index 61aecf398a4b..915de58572e7 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -35,6 +35,8 @@ void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx); bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx, bool wait); +struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, + struct blk_mq_ctx *start); /* * Internal helpers for allocating/freeing the request map -- 2.9.5