On Wed, Jan 07 2015 at 11:24am -0500, Jens Axboe <axboe@xxxxxxxxx> wrote: > On 01/07/2015 09:22 AM, Mike Snitzer wrote: > >On Wed, Jan 07 2015 at 11:15am -0500, > >Mike Snitzer <snitzer@xxxxxxxxxx> wrote: > > > >>On Wed, Jan 07 2015 at 10:32am -0500, > >>Jens Axboe <axboe@xxxxxxxxx> wrote: > >> > >>>You forgot dm-1, that's what mkfs is sleeping on. But given that > >>>sdc/sdd look idle, it still looks like a case of dm-1 not > >>>appropriately running the queues after insertion. > >> > >>DM never directly runs the queues of the underlying SCSI devices > >>(e.g. sdc, sdd). > >> > >>request-based DM runs the DM device's queue on completion of a clone > >>request: > >> > >>dm_end_request -> rq_completed -> blk_run_queue_async > >> > >>Which ultimately does seem to be the wrong way around (like you say: > >>queue should run after insertion). > > > >Hmm, for q->mq_ops blk_insert_cloned_request() should already be running > >the queue. > > > >blk_insert_cloned_request is calling blk_mq_insert_request(rq, false, true, true); > > > >Third arg being @run_queue which results in blk_mq_run_hw_queue() being > >called. > > OK, that should be fine then. In that case, it's probably a missing > queue run in some other condition... Which does make more sense, > since "most" of the runs Bart did looked fine, it's just a slow one > every now and then. The one case that looks suspect (missing queue run) is if/when the multipath target's mapping function returns DM_MAPIO_REQUEUE. It only returns this if memory allocation failed (e.g. blk_get_request returns ENOMEM). Not seeing why DM core wouldn't want to re-run the DM device's queue in this case given it just called blk_requeue_request(). But that seems like something I need to revisit and not the ultimate problem. Looking closer, why not have blk_run_queue() (and blk_run_queue_async) call blk_mq_start_stopped_hw_queues() if q->mq_ops? As is scsi_run_queue() open-codes it. Actually, that is likely the ultimate problem: blk_run_queue* aren't trained for q->mq_ops. As such DM would need to open code a call to blk_mq_start_stopped_hw_queues. I think DM needs something like the following _untested_ patch, pretty glaring oversight on my part (I thought the appropriate old block methods would do the right thing for q->mq_ops): diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 549b815..d70a665 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -14,6 +14,7 @@ #include <linux/moduleparam.h> #include <linux/blkpg.h> #include <linux/bio.h> +#include <linux/blk-mq.h> #include <linux/mempool.h> #include <linux/slab.h> #include <linux/idr.h> @@ -1012,6 +1013,46 @@ static void end_clone_bio(struct bio *clone, int error) } /* + * request-based DM queue management functions. + */ +static void dm_stop_queue(struct request_queue *q) +{ + unsigned long flags; + + if (q->mq_ops) { + blk_mq_stop_hw_queues(q); + return; + } + + spin_lock_irqsave(q->queue_lock, flags); + blk_stop_queue(q); + spin_unlock_irqrestore(q->queue_lock, flags); +} + +static void dm_start_queue(struct request_queue *q) +{ + unsigned long flags; + + if (q->mq_ops) { + blk_mq_start_stopped_hw_queues(q, true); + return; + } + + spin_lock_irqsave(q->queue_lock, flags); + if (blk_queue_stopped(q)) + blk_start_queue(q); + spin_unlock_irqrestore(q->queue_lock, flags); +} + +static void dm_run_queue(struct request_queue *q) +{ + if (q->mq_ops) + blk_mq_start_stopped_hw_queues(q, true); + else + blk_run_queue_async(q); +} + +/* * Don't touch any member of the md after calling this function because * the md may be freed in dm_put() at the end of this function. * Or do dm_get() before calling this function and dm_put() later. @@ -1031,7 +1072,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) * queue lock again. */ if (run_queue) - blk_run_queue_async(md->queue); + dm_run_queue(md->queue); /* * dm_put() must be at the end of this function. See the comment above @@ -1119,35 +1160,6 @@ static void dm_requeue_unmapped_request(struct request *clone) dm_requeue_unmapped_original_request(tio->md, tio->orig); } -static void __stop_queue(struct request_queue *q) -{ - blk_stop_queue(q); -} - -static void stop_queue(struct request_queue *q) -{ - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - __stop_queue(q); - spin_unlock_irqrestore(q->queue_lock, flags); -} - -static void __start_queue(struct request_queue *q) -{ - if (blk_queue_stopped(q)) - blk_start_queue(q); -} - -static void start_queue(struct request_queue *q) -{ - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - __start_queue(q); - spin_unlock_irqrestore(q->queue_lock, flags); -} - static void dm_done(struct request *clone, int error, bool mapped) { int r = error; @@ -2419,7 +2431,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, * because request-based dm may be run just after the setting. */ if (dm_table_request_based(t) && !blk_queue_stopped(q)) - stop_queue(q); + dm_stop_queue(q); __bind_mempools(md, t); @@ -2886,7 +2898,7 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map, * dm defers requests to md->wq from md->queue. */ if (dm_request_based(md)) { - stop_queue(md->queue); + dm_stop_queue(md->queue); flush_kthread_worker(&md->kworker); } @@ -2909,7 +2921,7 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map, dm_queue_flush(md); if (dm_request_based(md)) - start_queue(md->queue); + dm_start_queue(md->queue); unlock_fs(md); dm_table_presuspend_undo_targets(map); @@ -2988,7 +3000,7 @@ static int __dm_resume(struct mapped_device *md, struct dm_table *map) * Request-based dm is queueing the deferred I/Os in its request_queue. */ if (dm_request_based(md)) - start_queue(md->queue); + dm_start_queue(md->queue); unlock_fs(md); -- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel