On Tue, Jul 31, 2018 at 01:51:22PM -0400, Peter Geis wrote: > > > On 07/31/2018 12:25 PM, Ming Lei wrote: > > Hi Peter, > > > > On Tue, Jul 31, 2018 at 08:47:45AM -0400, Peter Geis wrote: > > > Good Morning, > > > > > > On 07/30/2018 09:38 PM, Ming Lei wrote: > > > > Hi Peter, > > > > > > > > Thanks for collecting the log. > > > > > > > > On Mon, Jul 30, 2018 at 02:55:42PM -0400, Peter Geis wrote: > > > > > > > > > > > > > > > On 07/28/2018 09:37 AM, Ming Lei wrote: > > > > > > > > ... > > > > > > > > > [ 10.887209] systemd--112 0.n.1 2411122us : blk_mq_make_request: make > > > > > rq -1 > > > > > [ 10.890274] kworker/-98 0...1 2411506us : blk_mq_free_request: > > > > > complete: rq -1 > > > > > [ 10.893313] systemd--107 0...1 2412025us : blk_mq_make_request: make > > > > > rq -1 > > > > > [ 10.896354] systemd--107 0.... 2412323us : mmc_mq_queue_rq: queue rq > > > > > -1, 0 > > > > > [ 10.899388] systemd--107 0.... 2412327us : > > > > > blk_mq_try_issue_list_directly: issue direct: rq -1, ret 0 > > > > > [ 10.902463] (direxec-111 1...1 2413829us : blk_mq_make_request: make > > > > > rq -1 > > > > > [ 10.905513] systemd--114 1...1 2415159us : blk_mq_make_request: make > > > > > rq -1 > > > > > > > > Above is the most interesting part in the log. MMC sets hw queue depth > > > > as 1, and you are using none scheduler, that means the max number of > > > > in-flight requests should be one, but the above log shows that there may > > > > be 3 in-flight requests. > > > > > > That's odd, I have CFQ set as the default, is something changing this during > > > boot? > > > CONFIG_CFQ_GROUP_IOSCHED=y > > > CONFIG_DEFAULT_IOSCHED="cfq" > > > > No, now mmc has been converted to blk-mq, and the default mq io sched > > should have been mq-deadline for mmc, but not sure why it is none in > > your case. > > Thanks, you just answered the question. > It would be helpful if CONFIG_MQ_IOSCHED_DEADLINE=y is set. > > Would you like to continue tracking down why if that is unset or the module > is unavailable the whole thing breaks? Sure, I already sent you one simple debug patch in last email, which might show where the issue is. And up to now, the issue is highly related with mmc's internal sync. Could you apply the attached debug patch and collect the trace log again? Thanks, Ming
>From a713d76deebab1d8a9e1e25db6ab0eabea3032bd Mon Sep 17 00:00:00 2001 From: Ming Lei <ming.lei@xxxxxxxxxx> Date: Tue, 31 Jul 2018 08:31:58 +0800 Subject: [PATCH] mmc-blk-mq-debug: debug info Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> --- block/blk-mq-sched.c | 2 ++ block/blk-mq.c | 8 ++++++++ drivers/mmc/core/queue.c | 18 ++++++++++++++++++ drivers/mmc/host/sdhci.c | 1 + include/linux/blkdev.h | 1 + 5 files changed, 30 insertions(+) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index cf9c66c6d35a..6573684b07e5 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -415,6 +415,8 @@ void blk_mq_sched_insert_requests(struct request_queue *q, blk_mq_try_issue_list_directly(hctx, list); if (list_empty(list)) return; + if (blk_queue_debug(q)) + trace_printk("issue direct: partial done\n"); } blk_mq_insert_requests(hctx, ctx, list); } diff --git a/block/blk-mq.c b/block/blk-mq.c index e13bdc2707ce..fe807cd1f9ef 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -488,6 +488,9 @@ void blk_mq_free_request(struct request *rq) struct blk_mq_ctx *ctx = rq->mq_ctx; struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); + if (blk_queue_debug(q)) + trace_printk("complete: rq %d %d\n", rq->internal_tag, rq->tag); + if (rq->rq_flags & RQF_ELVPRIV) { if (e && e->type->ops.mq.finish_request) e->type->ops.mq.finish_request(rq); @@ -1793,6 +1796,9 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, list_del_init(&rq->queuelist); ret = blk_mq_request_issue_directly(rq); + if (blk_queue_debug(rq->q)) + trace_printk("issue direct: rq %d %d, ret %d\n", + rq->internal_tag, rq->tag, (int)ret); if (ret != BLK_STS_OK) { if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) { @@ -1841,6 +1847,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) return BLK_QC_T_NONE; } + if (blk_queue_debug(q)) + trace_printk("make rq %d %d\n", rq->internal_tag, rq->tag); rq_qos_track(q, rq, bio); cookie = request_to_qc_t(data.hctx, rq); diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 648eb6743ed5..ac2ffc5a8ed4 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -257,6 +257,10 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, if (mmc_card_removed(mq->card)) { req->rq_flags |= RQF_QUIET; + + if (blk_queue_debug(q)) + trace_printk("queue rq %d %d, %s\n", + req->internal_tag, req->tag, "io err"); return BLK_STS_IOERR; } @@ -266,6 +270,9 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, if (mq->recovery_needed) { spin_unlock_irq(q->queue_lock); + if (blk_queue_debug(q)) + trace_printk("queue rq %d %d, %s\n", + req->internal_tag, req->tag, "resource"); return BLK_STS_RESOURCE; } @@ -274,6 +281,10 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, if (mmc_cqe_dcmd_busy(mq)) { mq->cqe_busy |= MMC_CQE_DCMD_BUSY; spin_unlock_irq(q->queue_lock); + if (blk_queue_debug(q)) + trace_printk("queue rq %d %d, %s\n", + req->internal_tag, req->tag, + "resource"); return BLK_STS_RESOURCE; } break; @@ -310,6 +321,8 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, !host->hold_retune; } + if (blk_queue_debug(q)) + trace_printk("mmc before issue rq %d %d\n", req->internal_tag, req->tag); blk_mq_start_request(req); issued = mmc_blk_mq_issue_rq(mq, req); @@ -326,6 +339,9 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, break; } + if (blk_queue_debug(q)) + trace_printk("queue rq %d %d, %d\n", + req->internal_tag, req->tag, (int)ret); if (issued != MMC_REQ_STARTED) { bool put_card = false; @@ -404,6 +420,8 @@ static int mmc_mq_init_queue(struct mmc_queue *mq, int q_depth, mq->queue->queue_lock = lock; mq->queue->queuedata = mq; + blk_queue_flag_set(31, mq->queue); + return 0; free_tag_set: diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 1c828e0e9905..b2b471c2f5f6 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2640,6 +2640,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask) return; pr_err("%s: Got command interrupt 0x%08x even though no command operation was in progress.\n", mmc_hostname(host->mmc), (unsigned)intmask); + ftrace_dump(DUMP_ALL); sdhci_dumpregs(host); return; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 050d599f5ea9..29be3a6d5903 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -741,6 +741,7 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); #define blk_queue_preempt_only(q) \ test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags) #define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags) +#define blk_queue_debug(q) test_bit(31, &(q)->queue_flags) extern int blk_set_preempt_only(struct request_queue *q); extern void blk_clear_preempt_only(struct request_queue *q); -- 2.9.5