Re: New RAID causing system lockups

Mike Hartman <mike@xxxxxxxxxxxxxxxxxxxx> · Mon, 13 Sep 2010 23:35:50 -0400

>>>
>>> > Hi Mike,
>>> >  thanks for the updates.
>>> >
>>> > I'm not entirely clear what is happening (in fact, due to a cold that I am
>>> > still fighting off, nothing is entirely clear at the moment), but it looks
>>> > very likely that the problem is due to an interplay between barrier handling,
>>> > and the multi-level structure of your array (a raid0 being a member of a
>>> > raid5).
>>> >
>>> > When a barrier request is processed, both arrays will schedule 'work' to be
>>> > done by the 'event' thread and I'm guess that you can get into a situation
>>> > where one work time is wait for the other, but the other is behind the one on
>>> > the single queue (I wonder if that make sense...)
>>> >
>>> > Anyway, this patch might make a difference,  It reduced the number of work
>>> > items schedule in a way that could conceivably fix the problem.
>>> >
>>> > If you can test this, please report the results.  I cannot easily reproduce
>>> > the problem so there is limited testing that I can do.
>>> >
>>> > Thanks,
>>> > NeilBrown
>>> >
>>> >
>>> > diff --git a/drivers/md/md.c b/drivers/md/md.c
>>> > index f20d13e..7f2785c 100644
>>> > --- a/drivers/md/md.c
>>> > +++ b/drivers/md/md.c
>>> > @@ -294,6 +294,23 @@ EXPORT_SYMBOL(mddev_congested);
>>> >
>>> >  #define POST_REQUEST_BARRIER ((void*)1)
>>> >
>>> > +static void md_barrier_done(mddev_t *mddev)
>>> > +{
>>> > +       struct bio *bio = mddev->barrier;
>>> > +
>>> > +       if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
>>> > +               bio_endio(bio, -EOPNOTSUPP);
>>> > +       else if (bio->bi_size == 0)
>>> > +               bio_endio(bio, 0);
>>> > +       else {
>>> > +               /* other options need to be handled from process context */
>>> > +               schedule_work(&mddev->barrier_work);
>>> > +               return;
>>> > +       }
>>> > +       mddev->barrier = NULL;
>>> > +       wake_up(&mddev->sb_wait);
>>> > +}
>>> > +
>>> >  static void md_end_barrier(struct bio *bio, int err)
>>> >  {
>>> >        mdk_rdev_t *rdev = bio->bi_private;
>>> > @@ -310,7 +327,7 @@ static void md_end_barrier(struct bio *bio, int err)
>>> >                        wake_up(&mddev->sb_wait);
>>> >                } else
>>> >                        /* The pre-request barrier has finished */
>>> > -                       schedule_work(&mddev->barrier_work);
>>> > +                       md_barrier_done(mddev);
>>> >        }
>>> >        bio_put(bio);
>>> >  }
>>> > @@ -350,18 +367,12 @@ static void md_submit_barrier(struct work_struct *ws)
>>> >
>>> >        atomic_set(&mddev->flush_pending, 1);
>>> >
>>> > -       if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
>>> > -               bio_endio(bio, -EOPNOTSUPP);
>>> > -       else if (bio->bi_size == 0)
>>> > -               /* an empty barrier - all done */
>>> > -               bio_endio(bio, 0);
>>> > -       else {
>>> > -               bio->bi_rw &= ~REQ_HARDBARRIER;
>>> > -               if (mddev->pers->make_request(mddev, bio))
>>> > -                       generic_make_request(bio);
>>> > -               mddev->barrier = POST_REQUEST_BARRIER;
>>> > -               submit_barriers(mddev);
>>> > -       }
>>> > +       bio->bi_rw &= ~REQ_HARDBARRIER;
>>> > +       if (mddev->pers->make_request(mddev, bio))
>>> > +               generic_make_request(bio);
>>> > +       mddev->barrier = POST_REQUEST_BARRIER;
>>> > +       submit_barriers(mddev);
>>> > +
>>> >        if (atomic_dec_and_test(&mddev->flush_pending)) {
>>> >                mddev->barrier = NULL;
>>> >                wake_up(&mddev->sb_wait);
>>> > @@ -383,7 +394,7 @@ void md_barrier_request(mddev_t *mddev, struct bio *bio)
>>> >        submit_barriers(mddev);
>>> >
>>> >        if (atomic_dec_and_test(&mddev->flush_pending))
>>> > -               schedule_work(&mddev->barrier_work);
>>> > +               md_barrier_done(mddev);
>>> >  }
>>> >  EXPORT_SYMBOL(md_barrier_request);
>>> >
>>> >
>>> >
>>>
>>> Neil, thanks for the patch. I experienced the lockup for the 5th time
>>> an hour ago (about 3 hours after the last hard reboot) so I thought it
>>> would be a good time to try your patch. Unfortunately I'm getting an
>>> error:
>>>
>>> patching file drivers/md/md.c
>>> Hunk #1 succeeded at 291 with fuzz 1 (offset -3 lines).
>>> Hunk #2 FAILED at 324.
>>> Hunk #3 FAILED at 364.
>>> Hunk #4 FAILED at 391.
>>> 3 out of 4 hunks FAILED -- saving rejects to file drivers/md/md.c.rej
>>
>> That is odd.
>> I took the md.c that you posted on the web site, use "patch" to apply my
>> patch to it, and only Hunk #3 failed.
>>
>> I used 'wiggle' to apply the patch and it applied perfectly, properly
>> replacing (1<<BIO_RW_BARRIER) with REQ_HARDBARRIER (or the other way around).
>>
>> Try this version.  You will need to be in drivers/md/, or use
>>
>>  patch drivers/md/md.c < this-patch
>>
>>
>> NeilBrown
>>
>> --- md.c.orig   2010-09-14 11:29:15.000000000 +1000
>> +++ md.c        2010-09-14 11:29:50.000000000 +1000
>> @@ -291,6 +291,23 @@
>>
>>  #define POST_REQUEST_BARRIER ((void*)1)
>>
>> +static void md_barrier_done(mddev_t *mddev)
>> +{
>> +       struct bio *bio = mddev->barrier;
>> +
>> +       if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
>> +               bio_endio(bio, -EOPNOTSUPP);
>> +       else if (bio->bi_size == 0)
>> +               bio_endio(bio, 0);
>> +       else {
>> +               /* other options need to be handled from process context */
>> +               schedule_work(&mddev->barrier_work);
>> +               return;
>> +       }
>> +       mddev->barrier = NULL;
>> +       wake_up(&mddev->sb_wait);
>> +}
>> +
>>  static void md_end_barrier(struct bio *bio, int err)
>>  {
>>        mdk_rdev_t *rdev = bio->bi_private;
>> @@ -307,7 +324,7 @@
>>                        wake_up(&mddev->sb_wait);
>>                } else
>>                        /* The pre-request barrier has finished */
>> -                       schedule_work(&mddev->barrier_work);
>> +                       md_barrier_done(mddev);
>>        }
>>        bio_put(bio);
>>  }
>> @@ -347,18 +364,12 @@
>>
>>        atomic_set(&mddev->flush_pending, 1);
>>
>> -       if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
>> -               bio_endio(bio, -EOPNOTSUPP);
>> -       else if (bio->bi_size == 0)
>> -               /* an empty barrier - all done */
>> -               bio_endio(bio, 0);
>> -       else {
>> -               bio->bi_rw &= ~(1<<BIO_RW_BARRIER);
>> -               if (mddev->pers->make_request(mddev, bio))
>> -                       generic_make_request(bio);
>> -               mddev->barrier = POST_REQUEST_BARRIER;
>> -               submit_barriers(mddev);
>> -       }
>> +       bio->bi_rw &= ~(1<<BIO_RW_BARRIER);
>> +       if (mddev->pers->make_request(mddev, bio))
>> +               generic_make_request(bio);
>> +       mddev->barrier = POST_REQUEST_BARRIER;
>> +       submit_barriers(mddev);
>> +
>>        if (atomic_dec_and_test(&mddev->flush_pending)) {
>>                mddev->barrier = NULL;
>>                wake_up(&mddev->sb_wait);
>> @@ -380,7 +391,7 @@
>>        submit_barriers(mddev);
>>
>>        if (atomic_dec_and_test(&mddev->flush_pending))
>> -               schedule_work(&mddev->barrier_work);
>> +               md_barrier_done(mddev);
>>  }
>>  EXPORT_SYMBOL(md_barrier_request);
>>
>>
>
> Sorry about that Neil, it was my fault. I copied your patch out of the
> email and I think it picked up some unintended characters. I tried
> copying it from the mailing list archive website instead and it
> patched in fine. The kernel compiled with no trouble and I'm booted
> into it now. No unexpected side-effects yet. I'll continue with my
> copying and we'll see if it locks up again. Thanks for all your help!
>
> Mike
>

Sorry Neil, locked up again in less than an hour. I've uploaded
dmesg_5.txt in case the trace shows something different/useful in
light of your patch.

Mike
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html