On 4/8/23 08:58, Bart Van Assche wrote: > Prepare for processing the requeue list from inside __blk_mq_run_hw_queue(). With such short comment, it is hard to see exactly what this patch is trying to do. The first part seems to be adding debugfs stuff, which I think is fine, but should be its own patch. The second part move the requeue work from per qeue to per hctx as I understand it. Why ? Can you explain that here ? > > Cc: Christoph Hellwig <hch@xxxxxx> > Cc: Damien Le Moal <damien.lemoal@xxxxxxxxxxxxxxxxxx> > Cc: Ming Lei <ming.lei@xxxxxxxxxx> > Cc: Mike Snitzer <snitzer@xxxxxxxxxx> > Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx> > --- > block/blk-mq-debugfs.c | 66 +++++++++++++++++++++--------------------- > block/blk-mq.c | 58 +++++++++++++++++++++++-------------- > include/linux/blk-mq.h | 4 +++ > include/linux/blkdev.h | 4 --- > 4 files changed, 73 insertions(+), 59 deletions(-) > > diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c > index 212a7f301e73..5eb930754347 100644 > --- a/block/blk-mq-debugfs.c > +++ b/block/blk-mq-debugfs.c > @@ -20,37 +20,6 @@ static int queue_poll_stat_show(void *data, struct seq_file *m) > return 0; > } > > -static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos) > - __acquires(&q->requeue_lock) > -{ > - struct request_queue *q = m->private; > - > - spin_lock_irq(&q->requeue_lock); > - return seq_list_start(&q->requeue_list, *pos); > -} > - > -static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos) > -{ > - struct request_queue *q = m->private; > - > - return seq_list_next(v, &q->requeue_list, pos); > -} > - > -static void queue_requeue_list_stop(struct seq_file *m, void *v) > - __releases(&q->requeue_lock) > -{ > - struct request_queue *q = m->private; > - > - spin_unlock_irq(&q->requeue_lock); > -} > - > -static const struct seq_operations queue_requeue_list_seq_ops = { > - .start = queue_requeue_list_start, > - .next = queue_requeue_list_next, > - .stop = queue_requeue_list_stop, > - .show = blk_mq_debugfs_rq_show, > -}; > - > static int blk_flags_show(struct seq_file *m, const unsigned long flags, > const char *const *flag_name, int flag_name_count) > { > @@ -156,11 +125,10 @@ static ssize_t queue_state_write(void *data, const char __user *buf, > > static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = { > { "poll_stat", 0400, queue_poll_stat_show }, > - { "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops }, > { "pm_only", 0600, queue_pm_only_show, NULL }, > { "state", 0600, queue_state_show, queue_state_write }, > { "zone_wlock", 0400, queue_zone_wlock_show, NULL }, > - { }, > + {}, > }; > > #define HCTX_STATE_NAME(name) [BLK_MQ_S_##name] = #name > @@ -513,6 +481,37 @@ static int hctx_dispatch_busy_show(void *data, struct seq_file *m) > return 0; > } > > +static void *hctx_requeue_list_start(struct seq_file *m, loff_t *pos) > + __acquires(&hctx->requeue_lock) > +{ > + struct blk_mq_hw_ctx *hctx = m->private; > + > + spin_lock_irq(&hctx->requeue_lock); > + return seq_list_start(&hctx->requeue_list, *pos); > +} > + > +static void *hctx_requeue_list_next(struct seq_file *m, void *v, loff_t *pos) > +{ > + struct blk_mq_hw_ctx *hctx = m->private; > + > + return seq_list_next(v, &hctx->requeue_list, pos); > +} > + > +static void hctx_requeue_list_stop(struct seq_file *m, void *v) > + __releases(&hctx->requeue_lock) > +{ > + struct blk_mq_hw_ctx *hctx = m->private; > + > + spin_unlock_irq(&hctx->requeue_lock); > +} > + > +static const struct seq_operations hctx_requeue_list_seq_ops = { > + .start = hctx_requeue_list_start, > + .next = hctx_requeue_list_next, > + .stop = hctx_requeue_list_stop, > + .show = blk_mq_debugfs_rq_show, > +}; > + > #define CTX_RQ_SEQ_OPS(name, type) \ > static void *ctx_##name##_rq_list_start(struct seq_file *m, loff_t *pos) \ > __acquires(&ctx->lock) \ > @@ -628,6 +627,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = { > {"run", 0600, hctx_run_show, hctx_run_write}, > {"active", 0400, hctx_active_show}, > {"dispatch_busy", 0400, hctx_dispatch_busy_show}, > + {"requeue_list", 0400, .seq_ops = &hctx_requeue_list_seq_ops}, > {"type", 0400, hctx_type_show}, > {}, > }; > diff --git a/block/blk-mq.c b/block/blk-mq.c > index 77fdaed4e074..deb3d08a6b26 100644 > --- a/block/blk-mq.c > +++ b/block/blk-mq.c > @@ -1411,14 +1411,17 @@ EXPORT_SYMBOL(blk_mq_requeue_request); > > static void blk_mq_requeue_work(struct work_struct *work) > { > - struct request_queue *q = > - container_of(work, struct request_queue, requeue_work.work); > + struct blk_mq_hw_ctx *hctx = > + container_of(work, struct blk_mq_hw_ctx, requeue_work.work); > LIST_HEAD(rq_list); > struct request *rq, *next; > > - spin_lock_irq(&q->requeue_lock); > - list_splice_init(&q->requeue_list, &rq_list); > - spin_unlock_irq(&q->requeue_lock); > + if (list_empty_careful(&hctx->requeue_list)) > + return; > + > + spin_lock_irq(&hctx->requeue_lock); > + list_splice_init(&hctx->requeue_list, &rq_list); > + spin_unlock_irq(&hctx->requeue_lock); > > list_for_each_entry_safe(rq, next, &rq_list, queuelist) { > if (!(rq->rq_flags & (RQF_SOFTBARRIER | RQF_DONTPREP))) > @@ -1435,13 +1438,13 @@ static void blk_mq_requeue_work(struct work_struct *work) > blk_mq_sched_insert_request(rq, false, false, false); > } > > - blk_mq_run_hw_queues(q, false); > + blk_mq_run_hw_queue(hctx, false); > } > > void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, > bool kick_requeue_list) > { > - struct request_queue *q = rq->q; > + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; > unsigned long flags; > > /* > @@ -1449,31 +1452,42 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, > * request head insertion from the workqueue. > */ > BUG_ON(rq->rq_flags & RQF_SOFTBARRIER); > + WARN_ON_ONCE(!rq->mq_hctx); > > - spin_lock_irqsave(&q->requeue_lock, flags); > + spin_lock_irqsave(&hctx->requeue_lock, flags); > if (at_head) { > rq->rq_flags |= RQF_SOFTBARRIER; > - list_add(&rq->queuelist, &q->requeue_list); > + list_add(&rq->queuelist, &hctx->requeue_list); > } else { > - list_add_tail(&rq->queuelist, &q->requeue_list); > + list_add_tail(&rq->queuelist, &hctx->requeue_list); > } > - spin_unlock_irqrestore(&q->requeue_lock, flags); > + spin_unlock_irqrestore(&hctx->requeue_lock, flags); > > if (kick_requeue_list) > - blk_mq_kick_requeue_list(q); > + blk_mq_kick_requeue_list(rq->q); > } > > void blk_mq_kick_requeue_list(struct request_queue *q) > { > - kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, 0); > + struct blk_mq_hw_ctx *hctx; > + unsigned long i; > + > + queue_for_each_hw_ctx(q, hctx, i) > + kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, > + &hctx->requeue_work, 0); > } > EXPORT_SYMBOL(blk_mq_kick_requeue_list); > > void blk_mq_delay_kick_requeue_list(struct request_queue *q, > unsigned long msecs) > { > - kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, > - msecs_to_jiffies(msecs)); > + struct blk_mq_hw_ctx *hctx; > + unsigned long i; > + > + queue_for_each_hw_ctx(q, hctx, i) > + kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, > + &hctx->requeue_work, > + msecs_to_jiffies(msecs)); > } > EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list); > > @@ -3594,6 +3608,10 @@ static int blk_mq_init_hctx(struct request_queue *q, > struct blk_mq_tag_set *set, > struct blk_mq_hw_ctx *hctx, unsigned hctx_idx) > { > + INIT_DELAYED_WORK(&hctx->requeue_work, blk_mq_requeue_work); > + INIT_LIST_HEAD(&hctx->requeue_list); > + spin_lock_init(&hctx->requeue_lock); > + > hctx->queue_num = hctx_idx; > > if (!(hctx->flags & BLK_MQ_F_STACKING)) > @@ -4209,10 +4227,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, > q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; > blk_mq_update_poll_flag(q); > > - INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work); > - INIT_LIST_HEAD(&q->requeue_list); > - spin_lock_init(&q->requeue_lock); > - > q->nr_requests = set->queue_depth; > > blk_mq_init_cpu_queues(q, set->nr_hw_queues); > @@ -4757,10 +4771,10 @@ void blk_mq_cancel_work_sync(struct request_queue *q) > struct blk_mq_hw_ctx *hctx; > unsigned long i; > > - cancel_delayed_work_sync(&q->requeue_work); > - > - queue_for_each_hw_ctx(q, hctx, i) > + queue_for_each_hw_ctx(q, hctx, i) { > + cancel_delayed_work_sync(&hctx->requeue_work); > cancel_delayed_work_sync(&hctx->run_work); > + } > } > > static int __init blk_mq_init(void) > diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h > index 3a3bee9085e3..0157f1569980 100644 > --- a/include/linux/blk-mq.h > +++ b/include/linux/blk-mq.h > @@ -311,6 +311,10 @@ struct blk_mq_hw_ctx { > unsigned long state; > } ____cacheline_aligned_in_smp; > > + struct list_head requeue_list; > + spinlock_t requeue_lock; > + struct delayed_work requeue_work; > + > /** > * @run_work: Used for scheduling a hardware queue run at a later time. > */ > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > index e3242e67a8e3..f5fa53cd13bd 100644 > --- a/include/linux/blkdev.h > +++ b/include/linux/blkdev.h > @@ -491,10 +491,6 @@ struct request_queue { > */ > struct blk_flush_queue *fq; > > - struct list_head requeue_list; > - spinlock_t requeue_lock; > - struct delayed_work requeue_work; > - > struct mutex sysfs_lock; > struct mutex sysfs_dir_lock; >