On 8/18/07, Mr. James W. Laferriere <babydr@xxxxxxxxxxxxxxxx> wrote: > Hello All , Here we go again . Again attempting to do bonnie++ testing > on a small array . > Kernel 2.6.22.1 > Patches involved , > IOP1 , 2.6.22.1-iop1 for improved sequential write performance > (stripe-queue) , Dan Williams <dan.j.williams@xxxxxxxxx> Hello James, Thanks for the report. I tried to reproduce this on my system, no luck. However it looks like their is a potential race between 'handle_queue' and 'add_queue_bio'. The attached patch moves these critical sections under spin_lock(&sq->lock), and adds some debugging output if this BUG triggers. It also includes a fix for retry_aligned_read which is unrelated to this debug. -- Dan
------------------------------------------------------------------------------- raid5-fix-sq-locking.patch ------------------------------------------------------------------------------- raid5: address potential sq->to_write race From: Dan Williams <dan.j.williams@xxxxxxxxx> synchronize reads and writes to the sq->to_write bit Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> --- drivers/md/raid5.c | 12 ++++++++---- 1 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 02e313b..688b8d3 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2289,10 +2289,14 @@ static int add_queue_bio(struct stripe_queue *sq, struct bio *bi, int dd_idx, sh = sq->sh; if (forwrite) { bip = &sq->dev[dd_idx].towrite; + set_bit(dd_idx, sq->to_write); if (*bip == NULL && (!sh || (sh && !sh->dev[dd_idx].written))) firstwrite = 1; - } else + } else { bip = &sq->dev[dd_idx].toread; + set_bit(dd_idx, sq->to_read); + } + while (*bip && (*bip)->bi_sector < bi->bi_sector) { if ((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector) goto overlap; @@ -2324,7 +2328,6 @@ static int add_queue_bio(struct stripe_queue *sq, struct bio *bi, int dd_idx, /* check if page is covered */ sector_t sector = sq->dev[dd_idx].sector; - set_bit(dd_idx, sq->to_write); for (bi = sq->dev[dd_idx].towrite; sector < sq->dev[dd_idx].sector + STRIPE_SECTORS && bi && bi->bi_sector <= sector; @@ -2334,8 +2337,7 @@ static int add_queue_bio(struct stripe_queue *sq, struct bio *bi, int dd_idx, } if (sector >= sq->dev[dd_idx].sector + STRIPE_SECTORS) set_bit(dd_idx, sq->overwrite); - } else - set_bit(dd_idx, sq->to_read); + } return 1; @@ -3656,6 +3658,7 @@ static void handle_queue(struct stripe_queue *sq, int disks, int data_disks) struct stripe_head *sh = NULL; /* continue to process i/o while the stripe is cached */ + spin_lock(&sq->lock); if (test_bit(STRIPE_QUEUE_HANDLE, &sq->state)) { if (io_weight(sq->overwrite, disks) == data_disks) { set_bit(STRIPE_QUEUE_IO_HI, &sq->state); @@ -3678,6 +3681,7 @@ static void handle_queue(struct stripe_queue *sq, int disks, int data_disks) */ BUG_ON(!(sq->sh && sq->sh == sh)); } + spin_unlock(&sq->lock); release_queue(sq); if (sh) { ------------------------------------------------------------------------------- raid5-debug-init_queue-bugs.patch ------------------------------------------------------------------------------- raid5: printk instead of BUG in init_queue From: Dan Williams <dan.j.williams@xxxxxxxxx> Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> --- drivers/md/raid5.c | 19 +++++++++++++------ 1 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 688b8d3..7164011 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -557,12 +557,19 @@ static void init_queue(struct stripe_queue *sq, sector_t sector, __FUNCTION__, (unsigned long long) sq->sector, (unsigned long long) sector, sq); - BUG_ON(atomic_read(&sq->count) != 0); - BUG_ON(io_weight(sq->to_read, disks)); - BUG_ON(io_weight(sq->to_write, disks)); - BUG_ON(io_weight(sq->overwrite, disks)); - BUG_ON(test_bit(STRIPE_QUEUE_HANDLE, &sq->state)); - BUG_ON(sq->sh); + if ((atomic_read(&sq->count) != 0) || io_weight(sq->to_read, disks) || + io_weight(sq->to_write, disks) || io_weight(sq->overwrite, disks) || + test_bit(STRIPE_QUEUE_HANDLE, &sq->state) || sq->sh) { + printk(KERN_ERR "%s: sector=%llx count: %d to_read: %lu " + "to_write: %lu overwrite: %lu state: %lx " + "sq->sh: %p\n", __FUNCTION__, + (unsigned long long) sq->sector, + atomic_read(&sq->count), + io_weight(sq->to_read, disks), + io_weight(sq->to_write, disks), + io_weight(sq->overwrite, disks), + sq->state, sq->sh); + } sq->state = (1 << STRIPE_QUEUE_HANDLE); sq->sector = sector; ------------------------------------------------------------------------------- raid5-fix-get_active_queue-bug.patch ------------------------------------------------------------------------------- raid5: fix get_active_queue bug in retry_aligned_read From: Dan Williams <dan.j.williams@xxxxxxxxx> Check for a potential null return from get_active_queue Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> --- drivers/md/raid5.c | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7164011..5fec942 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4459,7 +4459,8 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) continue; sq = get_active_queue(conf, sector, disks, pd_idx, 1); - sh = get_active_stripe(sq, disks, 1); + if (sq) + sh = get_active_stripe(sq, disks, 1); if (!(sq && sh)) { /* failed to get a queue/stripe - must wait */ raid_bio->bi_hw_segments = scnt;