Ira Weiny wrote: > Ira Weiny wrote: > > Coly, > > > > Yesterday I noticed that a few of our nvdimm tests were failing. I bisected > > the problem to the following commit. > > > > aa511ff8218b ("badblocks: switch to the improved badblock handling code") > > > > Reverting this patch fixed our tests. > > [snip] I added some prints[1] to try and see what is happening. Perhaps this will help you. ... [ 99.919237] IKW set_badblock 00000000aa44c55d 8000 1 [ 99.921448] IKW set_badblock 00000000aa44c55d 8001 1 [ 99.924051] IKW set_badblock 00000000aa44c55d 8002 1 [ 99.926135] IKW set_badblock 00000000aa44c55d 8003 1 [ 99.928516] IKW set_badblock 00000000aa44c55d 8004 1 [ 99.930491] IKW set_badblock 00000000aa44c55d 8005 1 [ 99.932894] IKW set_badblock 00000000aa44c55d 8006 1 [ 99.936638] IKW set_badblock 00000000aa44c55d 8007 1 [ 100.999297] IKW _badblocks_check() 00000000aa44c55d s 8000 num 1 [ 101.000027] IKW table count 1 shift 0 [ 101.000644] IKW 0: off 8000 end 8008 [ 101.001271] IKW prev 0, cnt 1 [ 101.002481] IKW start 8000, len 1 [ 101.003464] IKW front overlap 0 [ 101.004256] IKW rv 1 ... ^^^^^^^^^ <This is a valid failure as part of the test> ... [ 101.148783] IKW set_badblock 00000000721b4f3d 8000 1 [ 101.150629] IKW set_badblock 00000000721b4f3d 8001 1 [ 101.152315] IKW set_badblock 00000000721b4f3d 8002 1 [ 101.154544] IKW set_badblock 00000000721b4f3d 8003 1 [ 101.156238] IKW set_badblock 00000000721b4f3d 8004 1 [ 101.158310] IKW set_badblock 00000000721b4f3d 8005 1 [ 101.160196] IKW set_badblock 00000000721b4f3d 8006 1 [ 101.162158] IKW set_badblock 00000000721b4f3d 8007 1 [ 101.163543] IKW _badblocks_check() 00000000721b4f3d s 0 num 8 [ 101.164427] IKW table count 1 shift 0 [ 101.165310] IKW 0: off 8000 end 8008 [ 101.166398] IKW prev -1, cnt 1 [ 101.167178] IKW start 0, len 8 [ 101.168107] IKW rv 0 [ 101.168858] IKW _badblocks_check() 00000000721b4f3d s 8 num 8 [ 101.169814] IKW table count 1 shift 0 [ 101.170547] IKW 0: off 8000 end 8008 [ 101.171238] IKW prev -1, cnt 1 [ 101.171985] IKW start 8, len 8 [ 101.173007] IKW front overlap -1 <== this is prev which is used to index bb->pages [ 101.174157] IKW prev -1, cnt 1 [ 101.175268] IKW start 9, len 7 [ 101.176557] IKW rv -1 ... ^^^^^^^^^ This is where the failure occurs. ... I think overlap_front() is not working correctly in this case. And from my reading of the code I don't know how it would. But overlap_front() is used elsewhere and I'm not confident in making the change. Hope this helps, Ira [1] diff --git a/block/badblocks.c b/block/badblocks.c index fc92d4e18aa3..21e22ee576e5 100644 --- a/block/badblocks.c +++ b/block/badblocks.c @@ -1280,6 +1280,16 @@ static int _badblocks_check(struct badblocks *bb, sector_t s, int sectors, unsigned int seq; int len, rv; u64 *p; + int i; + + printk(KERN_CRIT "IKW %s() %p s %llx num %d\n", __func__, + bb, s, sectors); + + printk(KERN_CRIT " IKW table count %d shift %d\n", bb->count, bb->shift); + for (i = 0; i < bb->count; i++) { + printk(KERN_CRIT " IKW %d: off %llx end %llx\n", i, + BB_OFFSET(bb->page[i]), BB_END(bb->page[i])); + } WARN_ON(bb->shift < 0 || sectors == 0); @@ -1311,6 +1321,9 @@ static int _badblocks_check(struct badblocks *bb, sector_t s, int sectors, prev = prev_badblocks(bb, &bad, hint); + printk(KERN_CRIT " IKW prev %d, cnt %d\n", prev, bb->count); + printk(KERN_CRIT " IKW start %llx, len %llx\n", bad.start, bad.len); + /* start after all badblocks */ if ((prev + 1) >= bb->count && !overlap_front(bb, prev, &bad)) { len = sectors; @@ -1318,6 +1331,7 @@ static int _badblocks_check(struct badblocks *bb, sector_t s, int sectors, } if (overlap_front(bb, prev, &bad)) { + printk(KERN_CRIT " IKW front overlap %d\n", prev); if (BB_ACK(p[prev])) acked_badblocks++; else @@ -1365,6 +1379,7 @@ static int _badblocks_check(struct badblocks *bb, sector_t s, int sectors, if (read_seqretry(&bb->lock, seq)) goto retry; + printk(KERN_CRIT "IKW rv %d\n", rv); return rv; } diff --git a/drivers/nvdimm/badrange.c b/drivers/nvdimm/badrange.c index a002ea6fdd84..93ffd189bc75 100644 --- a/drivers/nvdimm/badrange.c +++ b/drivers/nvdimm/badrange.c @@ -167,6 +167,7 @@ static void set_badblock(struct badblocks *bb, sector_t s, int num) dev_dbg(bb->dev, "Found a bad range (0x%llx, 0x%llx)\n", (u64) s * 512, (u64) num * 512); /* this isn't an error as the hardware will still throw an exception */ + printk(KERN_CRIT "IKW %s %p %llx %x\n", __func__, bb, s, num); if (badblocks_set(bb, s, num, 1)) dev_info_once(bb->dev, "%s: failed for sector %llx\n", __func__, (u64) s);