On Mon, Dec 23, 2024 at 3:50 PM Yu Kuai <yukuai1@xxxxxxxxxxxxxxx> wrote: > > Hi, > > 在 2024/12/23 15:31, Xiao Ni 写道: > > On Wed, Dec 18, 2024 at 8:21 PM <yukuai@xxxxxxxxxx> wrote: > >> > >> From: Yu Kuai <yukuai3@xxxxxxxxxx> > >> > >> It is useless, because for the case IO failed for one rdev: > >> > >> - If badblocks is set and rdev is not faulty, there is no need to > >> mark the bit as NEEDED; > > > > > > Hi Kuai > > > > It's better to add some comments before here. Before this patch, it's > > easy to understand. It needs to set bitmap bit when a write request > > fails. Hi Kuai > > This is not accurate, it's doesn't matter if IO fails or succeed, bit > must be set if data is not consistent, either IO is not done yet, or the > array is degaraded. Sorry for the wrong words. I want to say bitmap NEEDED bit is set when a write request fails. After this patch, we can't see the logic directly. So it's a hidden logic. It's better to add more comments here for future maintenance. And I read the codes, R1BIO_Degraded, STRIPE_DEGRADED, R10BIO_Degraded, these three flags are only used to tell bitmap if it needs to set NEEDED bit. After this patch, it looks like these flags are not useful anymore. > > > With this patch, there are some hidden logics here. To me, it's > > not easy to maintain in the future. And in man mdadm, there is no-bbl > > option, so it looks like an array may not have a bad block. And I > > don't know how dmraid maintain badblock. So this patch needs to be > > more careful. > > no-bbl is one of the option of mdadm --update, I think it means remove > current badblock entries, not that disable badblocks. > > In kernel, badblocks is always enabled by default, and IO error will > always try to set badblocks first. For example: > > - badblocks_init() is called from md_rdev_init(), and if > badblocks_init() fails, the array can't be assembled. > - The only thing stop rdev to record badblocks after IO failure is that > rdev is faulty. Yes, thanks for pointing out this. > > Thanks, > Kuai > > > > > Regards > > Xiao > > > >> - If rdev is faulty, then mddev->degraded will be set, and we can use > >> it to mard the bit as NEEDED; > >> > >> Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx> > >> Signed-off-by: Yu Kuai <yukuai@xxxxxxxxxx> > >> --- > >> drivers/md/md-bitmap.c | 19 ++++++++++--------- > >> drivers/md/md-bitmap.h | 2 +- > >> drivers/md/raid1.c | 3 +-- > >> drivers/md/raid10.c | 3 +-- > >> drivers/md/raid5-cache.c | 3 +-- > >> drivers/md/raid5.c | 9 +++------ > >> 6 files changed, 17 insertions(+), 22 deletions(-) > >> > >> diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c > >> index 84fb4cc67d5e..b40a84b01085 100644 > >> --- a/drivers/md/md-bitmap.c > >> +++ b/drivers/md/md-bitmap.c > >> @@ -1726,7 +1726,7 @@ static int bitmap_startwrite(struct mddev *mddev, sector_t offset, > >> } > >> > >> static void bitmap_endwrite(struct mddev *mddev, sector_t offset, > >> - unsigned long sectors, bool success) > >> + unsigned long sectors) > >> { > >> struct bitmap *bitmap = mddev->bitmap; > >> > >> @@ -1745,15 +1745,16 @@ static void bitmap_endwrite(struct mddev *mddev, sector_t offset, > >> return; > >> } > >> > >> - if (success && !bitmap->mddev->degraded && > >> - bitmap->events_cleared < bitmap->mddev->events) { > >> - bitmap->events_cleared = bitmap->mddev->events; > >> - bitmap->need_sync = 1; > >> - sysfs_notify_dirent_safe(bitmap->sysfs_can_clear); > >> - } > >> - > >> - if (!success && !NEEDED(*bmc)) > >> + if (!bitmap->mddev->degraded) { > >> + if (bitmap->events_cleared < bitmap->mddev->events) { > >> + bitmap->events_cleared = bitmap->mddev->events; > >> + bitmap->need_sync = 1; > >> + sysfs_notify_dirent_safe( > >> + bitmap->sysfs_can_clear); > >> + } > >> + } else if (!NEEDED(*bmc)) { > >> *bmc |= NEEDED_MASK; > >> + } > >> > >> if (COUNTER(*bmc) == COUNTER_MAX) > >> wake_up(&bitmap->overflow_wait); > >> diff --git a/drivers/md/md-bitmap.h b/drivers/md/md-bitmap.h > >> index e87a1f493d3c..31c93019c76b 100644 > >> --- a/drivers/md/md-bitmap.h > >> +++ b/drivers/md/md-bitmap.h > >> @@ -92,7 +92,7 @@ struct bitmap_operations { > >> int (*startwrite)(struct mddev *mddev, sector_t offset, > >> unsigned long sectors); > >> void (*endwrite)(struct mddev *mddev, sector_t offset, > >> - unsigned long sectors, bool success); > >> + unsigned long sectors); > >> bool (*start_sync)(struct mddev *mddev, sector_t offset, > >> sector_t *blocks, bool degraded); > >> void (*end_sync)(struct mddev *mddev, sector_t offset, sector_t *blocks); > >> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c > >> index 15ba7a001f30..81dff2cea0db 100644 > >> --- a/drivers/md/raid1.c > >> +++ b/drivers/md/raid1.c > >> @@ -423,8 +423,7 @@ static void close_write(struct r1bio *r1_bio) > >> if (test_bit(R1BIO_BehindIO, &r1_bio->state)) > >> mddev->bitmap_ops->end_behind_write(mddev); > >> /* clear the bitmap if all writes complete successfully */ > >> - mddev->bitmap_ops->endwrite(mddev, r1_bio->sector, r1_bio->sectors, > >> - !test_bit(R1BIO_Degraded, &r1_bio->state)); > >> + mddev->bitmap_ops->endwrite(mddev, r1_bio->sector, r1_bio->sectors); > >> md_write_end(mddev); > >> } > >> > >> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c > >> index c3a93b2a26a6..3dc0170125b2 100644 > >> --- a/drivers/md/raid10.c > >> +++ b/drivers/md/raid10.c > >> @@ -429,8 +429,7 @@ static void close_write(struct r10bio *r10_bio) > >> struct mddev *mddev = r10_bio->mddev; > >> > >> /* clear the bitmap if all writes complete successfully */ > >> - mddev->bitmap_ops->endwrite(mddev, r10_bio->sector, r10_bio->sectors, > >> - !test_bit(R10BIO_Degraded, &r10_bio->state)); > >> + mddev->bitmap_ops->endwrite(mddev, r10_bio->sector, r10_bio->sectors); > >> md_write_end(mddev); > >> } > >> > >> diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c > >> index 4c7ecdd5c1f3..ba4f9577c737 100644 > >> --- a/drivers/md/raid5-cache.c > >> +++ b/drivers/md/raid5-cache.c > >> @@ -314,8 +314,7 @@ void r5c_handle_cached_data_endio(struct r5conf *conf, > >> set_bit(R5_UPTODATE, &sh->dev[i].flags); > >> r5c_return_dev_pending_writes(conf, &sh->dev[i]); > >> conf->mddev->bitmap_ops->endwrite(conf->mddev, > >> - sh->sector, RAID5_STRIPE_SECTORS(conf), > >> - !test_bit(STRIPE_DEGRADED, &sh->state)); > >> + sh->sector, RAID5_STRIPE_SECTORS(conf)); > >> } > >> } > >> } > >> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c > >> index 93cc7e252dd4..6eb2841ce28c 100644 > >> --- a/drivers/md/raid5.c > >> +++ b/drivers/md/raid5.c > >> @@ -3664,8 +3664,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, > >> } > >> if (bitmap_end) > >> conf->mddev->bitmap_ops->endwrite(conf->mddev, > >> - sh->sector, RAID5_STRIPE_SECTORS(conf), > >> - false); > >> + sh->sector, RAID5_STRIPE_SECTORS(conf)); > >> bitmap_end = 0; > >> /* and fail all 'written' */ > >> bi = sh->dev[i].written; > >> @@ -3711,8 +3710,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, > >> } > >> if (bitmap_end) > >> conf->mddev->bitmap_ops->endwrite(conf->mddev, > >> - sh->sector, RAID5_STRIPE_SECTORS(conf), > >> - false); > >> + sh->sector, RAID5_STRIPE_SECTORS(conf)); > >> /* If we were in the middle of a write the parity block might > >> * still be locked - so just clear all R5_LOCKED flags > >> */ > >> @@ -4062,8 +4060,7 @@ static void handle_stripe_clean_event(struct r5conf *conf, > >> wbi = wbi2; > >> } > >> conf->mddev->bitmap_ops->endwrite(conf->mddev, > >> - sh->sector, RAID5_STRIPE_SECTORS(conf), > >> - !test_bit(STRIPE_DEGRADED, &sh->state)); > >> + sh->sector, RAID5_STRIPE_SECTORS(conf)); > >> if (head_sh->batch_head) { > >> sh = list_first_entry(&sh->batch_list, > >> struct stripe_head, > >> -- > >> 2.43.0 > >> > >> > > > > > > . > > >