Re: raid5:md3: kernel BUG , followed by , Silent halt .

"Dan Williams" <dan.j.williams@xxxxxxxxx> · Mon, 20 Aug 2007 10:29:49 -0700

On 8/18/07, Mr. James W. Laferriere <babydr@xxxxxxxxxxxxxxxx> wrote:
>         Hello All ,  Here we go again .  Again attempting to do bonnie++ testing
> on a small array .
>         Kernel 2.6.22.1
>         Patches involved ,
>         IOP1 ,  2.6.22.1-iop1 for improved sequential write performance
> (stripe-queue) ,  Dan Williams <dan.j.williams@xxxxxxxxx>

Hello James,

Thanks for the report.

I tried to reproduce this on my system, no luck.  However it looks
like their is a potential race between 'handle_queue' and
'add_queue_bio'.  The attached patch moves these critical sections
under spin_lock(&sq->lock), and adds some debugging output if this BUG
triggers.  It also includes a fix for retry_aligned_read which is
unrelated to this debug.

--
Dan
-------------------------------------------------------------------------------
raid5-fix-sq-locking.patch
-------------------------------------------------------------------------------
raid5: address potential sq->to_write race

From: Dan Williams <dan.j.williams@xxxxxxxxx>

synchronize reads and writes to the sq->to_write bit

Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
---

 drivers/md/raid5.c |   12 ++++++++----
 1 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 02e313b..688b8d3 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2289,10 +2289,14 @@ static int add_queue_bio(struct stripe_queue *sq, struct bio *bi, int dd_idx,
 	sh = sq->sh;
 	if (forwrite) {
 		bip = &sq->dev[dd_idx].towrite;
+		set_bit(dd_idx, sq->to_write);
 		if (*bip == NULL && (!sh || (sh && !sh->dev[dd_idx].written)))
 			firstwrite = 1;
-	} else
+	} else {
 		bip = &sq->dev[dd_idx].toread;
+		set_bit(dd_idx, sq->to_read);
+	}
+
 	while (*bip && (*bip)->bi_sector < bi->bi_sector) {
 		if ((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector)
 			goto overlap;
@@ -2324,7 +2328,6 @@ static int add_queue_bio(struct stripe_queue *sq, struct bio *bi, int dd_idx,
 		/* check if page is covered */
 		sector_t sector = sq->dev[dd_idx].sector;
 
-		set_bit(dd_idx, sq->to_write);
 		for (bi = sq->dev[dd_idx].towrite;
 		     sector < sq->dev[dd_idx].sector + STRIPE_SECTORS &&
 			     bi && bi->bi_sector <= sector;
@@ -2334,8 +2337,7 @@ static int add_queue_bio(struct stripe_queue *sq, struct bio *bi, int dd_idx,
 		}
 		if (sector >= sq->dev[dd_idx].sector + STRIPE_SECTORS)
 			set_bit(dd_idx, sq->overwrite);
-	} else
-		set_bit(dd_idx, sq->to_read);
+	}
 
 	return 1;
 
@@ -3656,6 +3658,7 @@ static void handle_queue(struct stripe_queue *sq, int disks, int data_disks)
 	struct stripe_head *sh = NULL;
 
 	/* continue to process i/o while the stripe is cached */
+	spin_lock(&sq->lock);
 	if (test_bit(STRIPE_QUEUE_HANDLE, &sq->state)) {
 		if (io_weight(sq->overwrite, disks) == data_disks) {
 			set_bit(STRIPE_QUEUE_IO_HI, &sq->state);
@@ -3678,6 +3681,7 @@ static void handle_queue(struct stripe_queue *sq, int disks, int data_disks)
 		 */
 		BUG_ON(!(sq->sh && sq->sh == sh));
 	}
+	spin_unlock(&sq->lock);
 
 	release_queue(sq);
 	if (sh) {
-------------------------------------------------------------------------------
raid5-debug-init_queue-bugs.patch
-------------------------------------------------------------------------------
raid5: printk instead of BUG in init_queue

From: Dan Williams <dan.j.williams@xxxxxxxxx>

Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
---

 drivers/md/raid5.c |   19 +++++++++++++------
 1 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 688b8d3..7164011 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -557,12 +557,19 @@ static void init_queue(struct stripe_queue *sq, sector_t sector,
 		__FUNCTION__, (unsigned long long) sq->sector,
 		(unsigned long long) sector, sq);
 
-	BUG_ON(atomic_read(&sq->count) != 0);
-	BUG_ON(io_weight(sq->to_read, disks));
-	BUG_ON(io_weight(sq->to_write, disks));
-	BUG_ON(io_weight(sq->overwrite, disks));
-	BUG_ON(test_bit(STRIPE_QUEUE_HANDLE, &sq->state));
-	BUG_ON(sq->sh);
+	if ((atomic_read(&sq->count) != 0) || io_weight(sq->to_read, disks) ||
+	    io_weight(sq->to_write, disks) || io_weight(sq->overwrite, disks) ||
+	    test_bit(STRIPE_QUEUE_HANDLE, &sq->state) || sq->sh) {
+		printk(KERN_ERR "%s: sector=%llx count: %d to_read: %lu "
+				"to_write: %lu overwrite: %lu state: %lx "
+				"sq->sh: %p\n", __FUNCTION__,
+				(unsigned long long) sq->sector,
+				atomic_read(&sq->count),
+				io_weight(sq->to_read, disks),
+				io_weight(sq->to_write, disks),
+				io_weight(sq->overwrite, disks),
+				sq->state, sq->sh);
+	}
 
 	sq->state = (1 << STRIPE_QUEUE_HANDLE);
 	sq->sector = sector;
-------------------------------------------------------------------------------
raid5-fix-get_active_queue-bug.patch
-------------------------------------------------------------------------------
raid5: fix get_active_queue bug in retry_aligned_read

From: Dan Williams <dan.j.williams@xxxxxxxxx>

Check for a potential null return from get_active_queue

Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
---

 drivers/md/raid5.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7164011..5fec942 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4459,7 +4459,8 @@ static int  retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
 			continue;
 
 		sq = get_active_queue(conf, sector, disks, pd_idx, 1);
-		sh = get_active_stripe(sq, disks, 1);
+		if (sq)
+			sh = get_active_stripe(sq, disks, 1);
 		if (!(sq && sh)) {
 			/* failed to get a queue/stripe - must wait */
 			raid_bio->bi_hw_segments = scnt;