On Thu, Nov 7, 2024 at 9:22 PM Yu Kuai <yukuai1@xxxxxxxxxxxxxxx> wrote: > > Hi, > > 在 2024/11/05 23:34, Haris Iqbal 写道: > > On Tue, Nov 5, 2024 at 3:04 PM Dragan Milivojević <galileo@xxxxxxxxxxx> wrote: > >> > >> On Tue, 5 Nov 2024 at 10:58, Haris Iqbal <haris.iqbal@xxxxxxxxx> wrote: > >>> > >>> Hi, > >>> > >>> I am running fio over a RDMA block device. The server side of this > >>> mapping is an md-raid0 device, created over 3 md-raid5 devices. > >>> The md-raid5 devices each are created over 8 block devices. Below is > >>> how the raid configuration looks (md400, md300, md301 and md302 are > >>> relevant for this discussion here). > >> > >> Try disabling the bitmap as a quick "fix" and see if that helps. > > > > Yes. Disabling bitmap does seem to prevent the hang completely. I ran > > fio for 10 minutes and no hang. > > Triggered the hang in 10 seconds after reverting back to internal bitmap. > > > > Can you give the following patch a test? It's based on v6.11. > > Thanks, > Kuai > > diff --git a/drivers/md/md.c b/drivers/md/md.c > index d3a837506a36..5e1a82b79e41 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -8753,6 +8753,30 @@ void md_submit_discard_bio(struct mddev *mddev, > struct md_rdev *rdev, > } > EXPORT_SYMBOL_GPL(md_submit_discard_bio); > > +static bool is_raid456(struct mddev *mddev) > +{ > + return mddev->pers->level == 4 || mddev->pers->level == 5 || > + mddev->pers->level == 6; > +} > + > +static void bitmap_startwrite(struct mddev *mddev, struct bio *bio) > +{ > + if (!is_raid456(mddev) || !mddev->bitmap) > + return; > + > + md_bitmap_startwrite(mddev->bitmap, bio_offset(bio), > bio_sectors(bio), > + 0); > +} > + > +static void bitmap_endwrite(struct mddev *mddev, struct bio *bio, > sector_t sectors) > +{ > + if (!is_raid456(mddev) || !mddev->bitmap) > + return; > + > + md_bitmap_endwrite(mddev->bitmap, bio_offset(bio), sectors,o > + bio->bi_status == BLK_STS_OK, 0); > +} > + > static void md_end_clone_io(struct bio *bio) > { > struct md_io_clone *md_io_clone = bio->bi_private; > @@ -8765,6 +8789,7 @@ static void md_end_clone_io(struct bio *bio) > if (md_io_clone->start_time) > bio_end_io_acct(orig_bio, md_io_clone->start_time); > > + bitmap_endwrite(mddev, orig_bio, md_io_clone->sectors); > bio_put(bio); > bio_endio(orig_bio); > percpu_ref_put(&mddev->active_io); > @@ -8778,6 +8803,7 @@ static void md_clone_bio(struct mddev *mddev, > struct bio **bio) > bio_alloc_clone(bdev, *bio, GFP_NOIO, > &mddev->io_clone_set); > > md_io_clone = container_of(clone, struct md_io_clone, bio_clone); > + md_io_clone->sectors = bio_sectors(*bio); > md_io_clone->orig_bio = *bio; > md_io_clone->mddev = mddev; > if (blk_queue_io_stat(bdev->bd_disk->queue)) > @@ -8790,6 +8816,7 @@ static void md_clone_bio(struct mddev *mddev, > struct bio **bio) > > void md_account_bio(struct mddev *mddev, struct bio **bio) > { > + bitmap_startwrite(mddev, *bio); > percpu_ref_get(&mddev->active_io); > md_clone_bio(mddev, bio); > } > @@ -8807,6 +8834,8 @@ void md_free_cloned_bio(struct bio *bio) > if (md_io_clone->start_time) > bio_end_io_acct(orig_bio, md_io_clone->start_time); > > + bitmap_endwrite(mddev, orig_bio, md_io_clone->sectors); > + > bio_put(bio); > percpu_ref_put(&mddev->active_io); > } > diff --git a/drivers/md/md.h b/drivers/md/md.h > index a0d6827dced9..0c2794230e0a 100644 > --- a/drivers/md/md.h > +++ b/drivers/md/md.h > @@ -837,6 +837,7 @@ struct md_io_clone { > struct mddev *mddev; > struct bio *orig_bio; > unsigned long start_time; > + sector_t sectors; > struct bio bio_clone; > }; > diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c > index c14cf2410365..4f009e32f68a 100644 > --- a/drivers/md/raid5.c > +++ b/drivers/md/raid5.c > @@ -3561,12 +3561,6 @@ static void __add_stripe_bio(struct stripe_head > *sh, struct bio *bi, > * is added to a batch, STRIPE_BIT_DELAY cannot be changed > * any more. > */ > - set_bit(STRIPE_BITMAP_PENDING, &sh->state); > - spin_unlock_irq(&sh->stripe_lock); > - md_bitmap_startwrite(conf->mddev->bitmap, sh->sector, > - RAID5_STRIPE_SECTORS(conf), 0); > - spin_lock_irq(&sh->stripe_lock); > - clear_bit(STRIPE_BITMAP_PENDING, &sh->state); > if (!sh->batch_head) { > sh->bm_seq = conf->seq_flush+1; > set_bit(STRIPE_BIT_DELAY, &sh->state); > @@ -3621,7 +3615,6 @@ handle_failed_stripe(struct r5conf *conf, struct > stripe_head *sh, > BUG_ON(sh->batch_head); > for (i = disks; i--; ) { > struct bio *bi; > - int bitmap_end = 0; > > if (test_bit(R5_ReadError, &sh->dev[i].flags)) { > struct md_rdev *rdev = conf->disks[i].rdev; > @@ -3646,8 +3639,6 @@ handle_failed_stripe(struct r5conf *conf, struct > stripe_head *sh, > sh->dev[i].towrite = NULL; > sh->overwrite_disks = 0; > spin_unlock_irq(&sh->stripe_lock); > - if (bi) > - bitmap_end = 1; > > log_stripe_write_finished(sh); > @@ -3662,10 +3653,6 @@ handle_failed_stripe(struct r5conf *conf, struct > stripe_head *sh, > bio_io_error(bi); > bi = nextbi; > } > - if (bitmap_end) > - md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, > - RAID5_STRIPE_SECTORS(conf), > 0, 0); > - bitmap_end = 0; > /* and fail all 'written' */ > bi = sh->dev[i].written; > sh->dev[i].written = NULL; > @@ -3674,7 +3661,6 @@ handle_failed_stripe(struct r5conf *conf, struct > stripe_head *sh, > sh->dev[i].page = sh->dev[i].orig_page; > } > > - if (bi) bitmap_end = 1; > while (bi && bi->bi_iter.bi_sector < > sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) { > struct bio *bi2 = r5_next_bio(conf, bi, > sh->dev[i].sector); > @@ -3708,9 +3694,6 @@ handle_failed_stripe(struct r5conf *conf, struct > stripe_head *sh, > bi = nextbi; > } > } > - if (bitmap_end) > - md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, > - RAID5_STRIPE_SECTORS(conf), > 0, 0); > /* If we were in the middle of a write the parity block > might > * still be locked - so just clear all R5_LOCKED flags > */ > @@ -4059,10 +4042,6 @@ static void handle_stripe_clean_event(struct > r5conf *conf, > bio_endio(wbi); > wbi = wbi2; > } > - md_bitmap_endwrite(conf->mddev->bitmap, > sh->sector, > - RAID5_STRIPE_SECTORS(conf), > - !test_bit(STRIPE_DEGRADED, &sh->state), > - 0); > if (head_sh->batch_head) { > sh = > list_first_entry(&sh->batch_list, > struct > stripe_head, > @@ -5788,13 +5767,6 @@ static void make_discard_request(struct mddev > *mddev, struct bio *bi) > } > spin_unlock_irq(&sh->stripe_lock); > if (conf->mddev->bitmap) { > - for (d = 0; > - d < conf->raid_disks - conf->max_degraded; > - d++) > - md_bitmap_startwrite(mddev->bitmap, > - sh->sector, > - RAID5_STRIPE_SECTORS(conf), > - 0); > sh->bm_seq = conf->seq_flush + 1; > set_bit(STRIPE_BIT_DELAY, &sh->state); > } > > . > > > > Hi all I just replied against one raid5 stuck problem. This one looks like a similar one. We have a customer who reports one similar problem. David has a patch that can work. I place it in the attachment. Can you have a try also? The patch can be applied cleanly on 6.11-rc6 Regards Xiao
Attachment:
md_raid5_one_bitmap_claim_per_stripe_head.patch
Description: Binary data