Re: Bad/Unaligned block number requested

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2018/5/6 3:28 AM, Eric Wheeler wrote:
> On Sun, 6 May 2018, Coly Li wrote:
> 
>> On 2018/4/27 4:32 AM, Eric Wheeler wrote:
>>>> Hi Coly,
>>>>
>>>> I'm sure you've been busy with the v4.17 merge but I thought I 
>>>> would check in:  
>>>>
>>>> Have you had a chance to look at this?  It is an opportunity to fix this 
>>>> 4k bug for the future since we can still reproduce the error.
>>>>
>>>>>>> bcache: bch_count_io_errors() dm-6: IO error on reading from cache, recovering <<<
>>>>
>>>> What do you think, is there data corruption exposure here since 4.1.49 
>>>> still has the dirty-cache-recorvery bug? 
>>>
>>> I just noticed that "bcache: only permit to recovery read error when cache 
>>> device is clean" is in v4.1.49, but would this recover gracefully in the 
>>> 4k error situation?
>>>
>>>> Also, would your failure-recovery patch series address this type of 
>>>> failure?
>>
>> Hi Eric,
>>
>> I just find a time slot to compose a patch checking 4K alignment of I/Os
>> to backing device. After testing and first glance at the messages, I
>> will post it out.
>>
> 
> Thank you Coly, I appreciate your help!  

Hi Eric,

Please check the attached patch, it checks bcache key offset, if the
offset is not 4KB aligned, a call trace will be printed out.

I also run it with my own hardware, here is some information I may share.

It seems bcache just tries to cache bio with any offset, no 4K alignment
required. When I use fio with directIO, non-4K aligned bio can be sent
into bcache code and it is just cached.

I can see numerous call trace when I use directIO with 512B/1KB/2KB
block size. But if I use 4KB block size in fio, or set block alignment
to 4KB, no warning call trace printed, not at all.

Then when I set block alignment to 2K in fio, even blocksize is 4KB,  I
can see non-4k-aligned warning.

Therefore I guess the most probably reason is, the upper layer code
sends non-4k-aligned bio into bcache code.

I also tried to set bcache block size to 4K with make-bcache -w, when
fio blocksize >= 4KB, no non-4k-aligned warning. But fio does not work
if its blocksize < bcache block size, I am not sure whether setting
bcache block size to 4K works to your situation.

Just for your information.

Coly Li
From 4fca0f5fa2504c85d397590b9429e30bcbd253db Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@xxxxxxx>
Date: Sun, 6 May 2018 00:05:21 +0800
Subject: [PATCH] bcache: check 4K alignment for KEY_OFFSET() and KEY_START()

If KEY_OFFSET() or KEY_START() of a key is not 4KB aligned, dump call
trace. The information may provide some clue why some special I/Os to
backing device are not 4KB aligned.

Signed-off-by: Coly Li <colyli@xxxxxxx>
---
 drivers/md/bcache/alloc.c     |  3 +++
 drivers/md/bcache/bcache.h    |  8 ++++++++
 drivers/md/bcache/bset.c      |  6 ++++--
 drivers/md/bcache/btree.c     |  9 ++++++---
 drivers/md/bcache/extents.c   | 14 ++++++++++++++
 drivers/md/bcache/movinggc.c  |  2 ++
 drivers/md/bcache/request.c   |  2 ++
 drivers/md/bcache/writeback.c | 10 ++++++++--
 8 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 8eeab72b93e2..b16f3b46ec62 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -616,6 +616,9 @@ bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors,
 
 	sectors = min(sectors, b->sectors_free);
 
+	check_4k_alignment(KEY_OFFSET(k), "KEY_OFFSET(k)");
+	check_4k_alignment(KEY_OFFSET(k) + sectors, "KEY_OFFSET(k) + sectors");
+
 	SET_KEY_OFFSET(k, KEY_OFFSET(k) + sectors);
 	SET_KEY_SIZE(k, sectors);
 	SET_KEY_PTRS(k, KEY_PTRS(&b->key));
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 6b420a55c745..c9ffef3cddd0 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -847,6 +847,14 @@ static inline void wake_up_allocators(struct cache_set *c)
 		wake_up_process(ca->alloc_thread);
 }
 
+static inline void check_4k_alignment(size_t sectors, char *str)
+{
+	if (unlikely(sectors & 7)) {
+		pr_err("%s is not 4KB aligned: %lu", str, sectors);
+		dump_stack();
+	}
+}
+
 /* Forward declarations */
 
 void bch_count_io_errors(struct cache *, int, const char *);
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 646fe85261c1..e6f9b9579d05 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -9,6 +9,7 @@
 
 #include "util.h"
 #include "bset.h"
+#include "bcache.h"
 
 #include <linux/console.h>
 #include <linux/random.h>
@@ -219,9 +220,10 @@ bool __bch_cut_back(const struct bkey *where, struct bkey *k)
 
 	BUG_ON(KEY_INODE(where) != KEY_INODE(k));
 
-	if (bkey_cmp(where, &START_KEY(k)) > 0)
+	if (bkey_cmp(where, &START_KEY(k)) > 0) {
+		check_4k_alignment(KEY_START(k), "KEY_START(k)");
 		len = KEY_OFFSET(where) - KEY_START(k);
-
+	}
 	bkey_copy_key(k, where);
 
 	BUG_ON(len > KEY_SIZE(k));
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 22b9e34ceb75..246fa3a5a71f 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1931,7 +1931,6 @@ static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op,
 		if (bkey_cmp(k, &b->key) <= 0) {
 			if (!b->level)
 				bkey_put(b->c, k);
-
 			ret |= btree_insert_key(b, k, replace_key);
 			bch_keylist_pop_front(insert_keys);
 		} else if (bkey_cmp(&START_KEY(k), &b->key) < 0) {
@@ -2320,10 +2319,11 @@ static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
 			return ret;
 	}
 
-	if (!b->level && (flags & MAP_END_KEY))
+	if (!b->level && (flags & MAP_END_KEY)) {
+		check_4k_alignment(KEY_OFFSET(&b->key), "KEY_OFFSET(&b->key)");
 		ret = fn(op, b, &KEY(KEY_INODE(&b->key),
 				     KEY_OFFSET(&b->key), 0));
-
+	}
 	return ret;
 }
 
@@ -2420,6 +2420,9 @@ void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
 	bch_btree_map_keys(&refill.op, c, &buf->last_scanned,
 			   refill_keybuf_fn, MAP_END_KEY);
 
+	check_4k_alignment(KEY_OFFSET(&start), "KEY_OFFSET(&start)");
+	check_4k_alignment(KEY_OFFSET(&buf->last_scanned), "KEY_OFFSET(&buf->last_scanned)");
+
 	trace_bcache_keyscan(refill.nr_found,
 			     KEY_INODE(&start), KEY_OFFSET(&start),
 			     KEY_INODE(&buf->last_scanned),
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 243de0bf15cd..3df03f401765 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -357,6 +357,11 @@ static bool bch_extent_insert_fixup(struct btree_keys *b,
 		 * operations.
 		 */
 
+		check_4k_alignment(KEY_START(k), "KEY_START(k)");
+		check_4k_alignment(KEY_START(insert), "KEY_START(insert)");
+		if (replace_key)
+			check_4k_alignment(KEY_START(replace_key), "KEY_START(replace_key)");
+
 		if (replace_key && KEY_SIZE(k)) {
 			/*
 			 * k might have been split since we inserted/found the
@@ -366,6 +371,7 @@ static bool bch_extent_insert_fixup(struct btree_keys *b,
 			uint64_t offset = KEY_START(k) -
 				KEY_START(replace_key);
 
+
 			/* But it must be a subset of the replace key */
 			if (KEY_START(k) < KEY_START(replace_key) ||
 			    KEY_OFFSET(k) > KEY_OFFSET(replace_key))
@@ -460,6 +466,13 @@ check_failed:
 		if (!sectors_found) {
 			return true;
 		} else if (sectors_found < KEY_SIZE(insert)) {
+			check_4k_alignment(KEY_OFFSET(insert), "KEY_OFFSET(insert)");
+			check_4k_alignment((KEY_SIZE(insert) - sectors_found),
+					"(KEY_SIZE(insert) - sectors_found)");
+			check_4k_alignment(KEY_OFFSET(insert) -
+					   (KEY_SIZE(insert) - sectors_found),
+					"KEY_OFFSET(insert) - (KEY_SIZE(insert) - sectors_found)");
+
 			SET_KEY_OFFSET(insert, KEY_OFFSET(insert) -
 				       (KEY_SIZE(insert) - sectors_found));
 			SET_KEY_SIZE(insert, sectors_found);
@@ -606,6 +619,7 @@ static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey
 			SET_KEY_CSUM(l, 0);
 	}
 
+	check_4k_alignment(KEY_OFFSET(l) + KEY_SIZE(r), "KEY_OFFSET(l) + KEY_SIZE(r)");
 	SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r));
 	SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r));
 
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index b929fc944e9c..05b8132b42b3 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -100,6 +100,8 @@ static void write_moving(struct closure *cl)
 	if (!op->error) {
 		moving_init(io);
 
+		check_4k_alignment(KEY_START(&io->w->key),
+				"KEY_START(&io->w->key)");
 		io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key);
 		op->write_prio		= 1;
 		op->bio			= &io->bio.bio;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 25fa8445bb24..370668976c59 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -515,6 +515,8 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
 	if (bkey_cmp(k, &KEY(s->iop.inode, bio->bi_iter.bi_sector, 0)) <= 0)
 		return MAP_CONTINUE;
 
+	check_4k_alignment(KEY_START(k), "KEY_START(k)");
+
 	if (KEY_INODE(k) != s->iop.inode ||
 	    KEY_START(k) > bio->bi_iter.bi_sector) {
 		unsigned bio_sectors = bio_sectors(bio);
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index b9346cd9cda1..ce93357e4c54 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -184,6 +184,7 @@ static void write_dirty(struct closure *cl)
 
 	dirty_init(w);
 	io->bio.bi_rw		= WRITE;
+	check_4k_alignment(KEY_START(&w->key), "KEY_START(&w->key)");
 	io->bio.bi_iter.bi_sector = KEY_START(&w->key);
 	io->bio.bi_bdev		= io->dc->bdev;
 	io->bio.bi_end_io	= dirty_endio;
@@ -236,11 +237,13 @@ static void read_dirty(struct cached_dev *dc)
 
 		BUG_ON(ptr_stale(dc->disk.c, &w->key, 0));
 
+		check_4k_alignment(KEY_START(&w->key), "KEY_START(&w->key)");
 		if (KEY_START(&w->key) != dc->last_read ||
 		    jiffies_to_msecs(delay) > 50)
 			while (!kthread_should_stop() && delay)
 				delay = schedule_timeout_interruptible(delay);
 
+		check_4k_alignment(KEY_OFFSET(&w->key), "KEY_OFFSET(&w->key)");
 		dc->last_read	= KEY_OFFSET(&w->key);
 
 		io = kzalloc(sizeof(struct dirty_io) + sizeof(struct bio_vec)
@@ -336,6 +339,8 @@ static void refill_full_stripes(struct cached_dev *dc)
 	unsigned start_stripe, stripe, next_stripe;
 	bool wrapped = false;
 
+	check_4k_alignment(KEY_OFFSET(&buf->last_scanned),
+			"KEY_OFFSET(&buf->last_scanned)");
 	stripe = offset_to_stripe(&dc->disk, KEY_OFFSET(&buf->last_scanned));
 
 	if (stripe >= dc->disk.nr_stripes)
@@ -481,10 +486,11 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b,
 	if (KEY_INODE(k) > op->inode)
 		return MAP_DONE;
 
-	if (KEY_DIRTY(k))
+	if (KEY_DIRTY(k)) {
+		check_4k_alignment(KEY_START(k), "KEY_START(k)");
 		bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
 					     KEY_START(k), KEY_SIZE(k));
-
+	}
 	return MAP_CONTINUE;
 }
 
-- 
2.16.3


[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux ARM Kernel]     [Linux Filesystem Development]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux