Re: [Regression] Linux-Next Merge 25Jul2018 breaks mmc on Tegra.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Jul 25, 2018 at 02:29:30PM -0400, Peter Geis wrote:
> On 07/25/2018 02:17 PM, Jens Axboe wrote:
> > On 7/25/18 10:28 AM, Peter Geis wrote:
> > > Good Afternoon,
> > > 
> > > I have encountered an issue on both Tegra 2 and Tegra 3 devices
> > > accessing emmc following the 25 July 2018 remote tracking merge.
> > > 
> > > The offending commit is:
> > > 6ce3dd6eec114930cf2035a8bcb1e80477ed79a8
> > > blk-mq: issue directly if hw queue isn't busy in case of 'none'.
> > 
> > Can you try my current for-next? This should fix it:
> > 
> > commit 8824f62246bef288173a6624a363352f0d4d3b09
> > Author: Ming Lei <ming.lei@xxxxxxxxxx>
> > Date:   Sun Jul 22 14:10:15 2018 +0800
> > 
> >      blk-mq: fail the request in case issue failure
> > 
> 
> That commit made the current merge window, it must be reverted before
> reverting the offending commit.
> 
> With that patch, the bug triggers then the kernel waits for the mmc to
> recover. It seems however that the bug leaves the mmc in a zombie state,
> where it is processing the previous command but the kernel has no control
> over it.
> 
> [    4.233073] mmc0: Got command interrupt 0x00000001 even though no command
> operation was in progress.
> [    4.242189] mmc0: sdhci: ============ SDHCI REGISTER DUMP ===========
> [    4.248616] mmc0: sdhci: Sys addr:  0x00000000 | Version:  0x00000001
> [    4.255041] mmc0: sdhci: Blk size:  0x00007200 | Blk cnt:  0x00000000
> [    4.261465] mmc0: sdhci: Argument:  0x002e3b10 | Trn mode: 0x00000033
> [    4.267890] mmc0: sdhci: Present:   0x1ff70000 | Host ctl: 0x00000031
> [    4.274314] mmc0: sdhci: Power:     0x0000000f | Blk gap:  0x00000000
> [    4.280737] mmc0: sdhci: Wake-up:   0x00000000 | Clock:    0x00000007
> [    4.287162] mmc0: sdhci: Timeout:   0x0000000e | Int stat: 0x00000002
> [    4.293586] mmc0: sdhci: Int enab:  0x02ff000b | Sig enab: 0x02fc000b
> [    4.300010] mmc0: sdhci: AC12 err:  0x00000000 | Slot int: 0x00000000
> [    4.306433] mmc0: sdhci: Caps:      0xe7ffd080 | Caps_1:   0x00000074
> [    4.312857] mmc0: sdhci: Cmd:       0x0000123a | Max curr: 0x00969696
> [    4.319281] mmc0: sdhci: Resp[0]:   0x00000900 | Resp[1]:  0x04800e92
> [    4.325705] mmc0: sdhci: Resp[2]:   0x074b8000 | Resp[3]:  0x00000240
> [    4.332128] mmc0: sdhci: Host ctl2: 0x00000000
> [    4.336560] mmc0: sdhci: ADMA Err:  0x00000000 | ADMA Ptr: 0xae2f9220
> [    4.342981] mmc0: sdhci: ============================================

Not found issue in the two merged patches, and it might be one mmc
internal issue, could you apply the following debug patch and post
the console log?

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index cf9c66c6d35a..6573684b07e5 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -415,6 +415,8 @@ void blk_mq_sched_insert_requests(struct request_queue *q,
 			blk_mq_try_issue_list_directly(hctx, list);
 			if (list_empty(list))
 				return;
+			if (blk_queue_debug(q))
+				trace_printk("issue direct: partial done\n");
 		}
 		blk_mq_insert_requests(hctx, ctx, list);
 	}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e13bdc2707ce..efb898323e2d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -488,6 +488,9 @@ void blk_mq_free_request(struct request *rq)
 	struct blk_mq_ctx *ctx = rq->mq_ctx;
 	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
 
+	if (blk_queue_debug(q))
+		trace_printk("complete: rq %d\n", rq->internal_tag);
+
 	if (rq->rq_flags & RQF_ELVPRIV) {
 		if (e && e->type->ops.mq.finish_request)
 			e->type->ops.mq.finish_request(rq);
@@ -1793,6 +1796,9 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
 
 		list_del_init(&rq->queuelist);
 		ret = blk_mq_request_issue_directly(rq);
+		if (blk_queue_debug(rq->q))
+			trace_printk("issue direct: rq %d, ret %d\n",
+					rq->internal_tag, (int)ret);
 		if (ret != BLK_STS_OK) {
 			if (ret == BLK_STS_RESOURCE ||
 					ret == BLK_STS_DEV_RESOURCE) {
@@ -1841,6 +1847,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		return BLK_QC_T_NONE;
 	}
 
+	 if (blk_queue_debug(q))
+		 trace_printk("make rq %d\n", rq->internal_tag);
 	rq_qos_track(q, rq, bio);
 
 	cookie = request_to_qc_t(data.hctx, rq);
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 648eb6743ed5..90745829cf9a 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -257,6 +257,9 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	if (mmc_card_removed(mq->card)) {
 		req->rq_flags |= RQF_QUIET;
+
+		if (blk_queue_debug(q))
+			trace_printk("queue rq %d, %s\n", req->internal_tag, "io err");
 		return BLK_STS_IOERR;
 	}
 
@@ -266,6 +269,9 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	if (mq->recovery_needed) {
 		spin_unlock_irq(q->queue_lock);
+		if (blk_queue_debug(q))
+			trace_printk("queue rq %d, %s\n", req->internal_tag,
+					"resource");
 		return BLK_STS_RESOURCE;
 	}
 
@@ -274,6 +280,9 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 		if (mmc_cqe_dcmd_busy(mq)) {
 			mq->cqe_busy |= MMC_CQE_DCMD_BUSY;
 			spin_unlock_irq(q->queue_lock);
+			if (blk_queue_debug(q))
+				trace_printk("queue rq %d, %s\n", req->internal_tag,
+					"resource");
 			return BLK_STS_RESOURCE;
 		}
 		break;
@@ -326,6 +335,8 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 		break;
 	}
 
+	if (blk_queue_debug(q))
+		trace_printk("queue rq %d, %d\n", req->internal_tag, (int)ret);
 	if (issued != MMC_REQ_STARTED) {
 		bool put_card = false;
 
@@ -404,6 +415,8 @@ static int mmc_mq_init_queue(struct mmc_queue *mq, int q_depth,
 	mq->queue->queue_lock = lock;
 	mq->queue->queuedata = mq;
 
+	blk_queue_flag_set(31, mq->queue);
+
 	return 0;
 
 free_tag_set:
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 1c828e0e9905..b2b471c2f5f6 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2640,6 +2640,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask)
 			return;
 		pr_err("%s: Got command interrupt 0x%08x even though no command operation was in progress.\n",
 		       mmc_hostname(host->mmc), (unsigned)intmask);
+		ftrace_dump(DUMP_ALL);
 		sdhci_dumpregs(host);
 		return;
 	}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 050d599f5ea9..29be3a6d5903 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -741,6 +741,7 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q);
 #define blk_queue_preempt_only(q)				\
 	test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags)
 #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
+#define blk_queue_debug(q)	test_bit(31, &(q)->queue_flags)
 
 extern int blk_set_preempt_only(struct request_queue *q);
 extern void blk_clear_preempt_only(struct request_queue *q);

Thanks,
Ming



[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux