When blk_get_request() fails to allocate one request, there is no way to make underlying queue's RESTART to restart DM's queue, this patch applies the new introduced blk_mq_alloc_request_notify() to deal with this issue. The following issues can be addressed: 1) When the blk_get_request() fails, finaly dm-rq will return BLK_STS_RESOURCE to blk-mq, then blk-mq will run queue after 10ms, this delay may degrade dm-rq performance a lot when any underlying device attached to the same host is accessed directly. 2) there can't be a way to figure out a perfect delay for running queue in the situation 1), and if the delay is too small, request can be dequeued from DM queue too quick, then sequential IO performance gets hurt. This approach is suggested by Jens, and has been used in blk-mq dispatch patch for a while, see blk_mq_mark_tag_wait(). Suggested-by: Jens Axboe <axboe@xxxxxxxxx> Cc: Mike Snitzer <snitzer@xxxxxxxxxx> Cc: Laurence Oberman <loberman@xxxxxxxxxx> Cc: Bart Van Assche <bart.vanassche@xxxxxxxxxxx> Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> --- drivers/md/dm-mpath.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- drivers/md/dm.c | 1 + 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 7d3e572072f5..f7753a2e9229 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -11,6 +11,7 @@ #include "dm-bio-record.h" #include "dm-path-selector.h" #include "dm-uevent.h" +#include "dm.h" #include <linux/blkdev.h> #include <linux/ctype.h> @@ -30,6 +31,15 @@ #define DM_PG_INIT_DELAY_MSECS 2000 #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1) +/* + * When running out of request from underlying request, use this + * structure to get notified from blk-mq + */ +struct underlying_notifier { + struct request_queue *q; + wait_queue_entry_t wait; +}; + /* Path properties */ struct pgpath { struct list_head list; @@ -40,6 +50,7 @@ struct pgpath { struct dm_path path; struct delayed_work activate_path; + struct underlying_notifier notifier; bool is_active:1; /* Path status */ }; @@ -125,6 +136,17 @@ static void process_queued_bios(struct work_struct *work); * Allocation routines *-----------------------------------------------*/ +static int req_notify(wait_queue_entry_t *wait, unsigned mode, int flags, + void *key) +{ + struct underlying_notifier *notifier; + + notifier = container_of(wait, struct underlying_notifier, wait); + list_del_init(¬ifier->wait.entry); + blk_mq_run_hw_queues(notifier->q, true); + return 1; +} + static struct pgpath *alloc_pgpath(void) { struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); @@ -490,6 +512,27 @@ static bool must_push_back_bio(struct multipath *m) return __must_push_back(m, flags); } +static struct request * +multipath_get_req(struct pgpath *pgpath, struct request_queue *q, + struct request *rq) +{ + if (!q->mq_ops) + return blk_get_request(q, rq->cmd_flags | REQ_NOMERGE, + GFP_ATOMIC); + + /* + * Even BLK_MQ_REQ_ALLOC_NOTIFY is passed, we can't return + * BLK_STS_DEV_RESOUCE to blk-mq if this allocation fails because + * the notification can come before adding dm's request to + * dispatch list, so still need to return BLK_STS_RESOUCE to + * blk-mq, and the notification will run queue and cancel the + * delay caused by BLK_STS_RESOUCE immediately. + */ + return blk_mq_alloc_request_notify(q, rq->cmd_flags | REQ_NOMERGE, + BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_ALLOC_NOTIFY, + &pgpath->notifier.wait); +} + /* * Map cloned requests (request-based multipath) */ @@ -526,7 +569,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, bdev = pgpath->path.dev->bdev; q = bdev_get_queue(bdev); - clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE, GFP_ATOMIC); + clone = multipath_get_req(pgpath, q, rq); if (IS_ERR(clone)) { /* EBUSY, ENODEV or EWOULDBLOCK: requeue */ if (blk_queue_dying(q)) { @@ -873,6 +916,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps int r; struct pgpath *p; struct multipath *m = ti->private; + struct mapped_device *md = dm_table_get_md((m)->ti->table); /* we need at least a path arg */ if (as->argc < 1) { @@ -906,6 +950,10 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps goto bad; } + p->notifier.q = dm_get_md_queue(md); + init_waitqueue_func_entry(&p->notifier.wait, req_notify); + INIT_LIST_HEAD(&p->notifier.wait.entry); + return p; bad: free_pgpath(p); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d6de00f367ef..ff06efcaf36e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -445,6 +445,7 @@ struct request_queue *dm_get_md_queue(struct mapped_device *md) { return md->queue; } +EXPORT_SYMBOL_GPL(dm_get_md_queue); struct dm_stats *dm_get_stats(struct mapped_device *md) { -- 2.9.5