On Thu, Feb 7, 2019 at 11:26 AM Nate Dailey <nate.dailey@xxxxxxxxxxx> wrote: > > sync_request_write no longer submits writes to a Faulty device. This has > the unfortunate side effect that bitmap bits can be incorrectly cleared > if a recovery is interrupted (previously, end_sync_write would have > prevented this). This means the next recovery may not copy everything > it should, potentially corrupting data. Hi Nate, Which commit introduces this bug? Could you please add a "Fixes" line pointing to that commit? Thanks, Song > > Add a function for doing the proper md_bitmap_end_sync, called from > end_sync_write and the Faulty case in sync_request_write. > > Signed-off-by: Nate Dailey <nate.dailey@xxxxxxxxxxx> > --- > drivers/md/raid1.c | 28 ++++++++++++++++++---------- > 1 file changed, 18 insertions(+), 10 deletions(-) > > diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c > index 1d54109071cc..fa47249fa3e4 100644 > --- a/drivers/md/raid1.c > +++ b/drivers/md/raid1.c > @@ -1863,6 +1863,20 @@ static void end_sync_read(struct bio *bio) > reschedule_retry(r1_bio); > } > > +static void abort_sync_write(struct mddev *mddev, struct r1bio *r1_bio) > +{ > + sector_t sync_blocks = 0; > + sector_t s = r1_bio->sector; > + long sectors_to_go = r1_bio->sectors; > + > + /* make sure these bits don't get cleared. */ > + do { > + md_bitmap_end_sync(mddev->bitmap, s, &sync_blocks, 1); > + s += sync_blocks; > + sectors_to_go -= sync_blocks; > + } while (sectors_to_go > 0); > +} > + > static void end_sync_write(struct bio *bio) > { > int uptodate = !bio->bi_status; > @@ -1874,15 +1888,7 @@ static void end_sync_write(struct bio *bio) > struct md_rdev *rdev = conf->mirrors[find_bio_disk(r1_bio, bio)].rdev; > > if (!uptodate) { > - sector_t sync_blocks = 0; > - sector_t s = r1_bio->sector; > - long sectors_to_go = r1_bio->sectors; > - /* make sure these bits doesn't get cleared. */ > - do { > - md_bitmap_end_sync(mddev->bitmap, s, &sync_blocks, 1); > - s += sync_blocks; > - sectors_to_go -= sync_blocks; > - } while (sectors_to_go > 0); > + abort_sync_write(mddev, r1_bio); > set_bit(WriteErrorSeen, &rdev->flags); > if (!test_and_set_bit(WantReplacement, &rdev->flags)) > set_bit(MD_RECOVERY_NEEDED, & > @@ -2172,8 +2178,10 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) > (i == r1_bio->read_disk || > !test_bit(MD_RECOVERY_SYNC, &mddev->recovery)))) > continue; > - if (test_bit(Faulty, &conf->mirrors[i].rdev->flags)) > + if (test_bit(Faulty, &conf->mirrors[i].rdev->flags)) { > + abort_sync_write(mddev, r1_bio); > continue; > + } > > bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); > if (test_bit(FailFast, &conf->mirrors[i].rdev->flags)) > -- > 2.19.1 >