Re: PROBLEM: repeatable lockup on RAID-6 with LUKS dm-crypt on NVMe devices when rsyncing many files

Yu Kuai <yukuai1@xxxxxxxxxxxxxxx> · Sat, 26 Oct 2024 17:07:34 +0800

Hi,

在 2024/10/26 13:37, Christian Theune 写道:

On 25. Oct 2024, at 16:02, Christian Theune <ct@xxxxxxxxxxxxxxx> wrote:

Yeah, this was more directed towards the question whether Yu needs me to run the patch that he posted earlier.

So. The current status is: previously this crashed within 2-3 hours. Both machines are now running with the bitmap turned off as described above and have been syncing data for about 7 hours. This seems to indicate that the bitmap is involved here.

Update: both machines have been able to finish their multi-TiB rsync job that previously caused reliable lockups. So: the bitmap code seems to be the culprit here …

Christian


Then, can you enable bitmap and test the following debug patch:

Thanks,
Kuai

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 58f71c3e1368..b2a75a904209 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2369,6 +2369,7 @@ static struct stripe_head *alloc_stripe(struct 
kmem_cache *sc, gfp_t gfp,
                atomic_set(&sh->count, 1);
                sh->raid_conf = conf;
                sh->log_start = MaxSector;
+               atomic_set(&sh->bitmap_counts, 0);

                if (raid5_has_ppl(conf)) {
                        sh->ppl_page = alloc_page(gfp);
@@ -3565,6 +3566,7 @@ static void __add_stripe_bio(struct stripe_head 
*sh, struct bio *bi,
                spin_unlock_irq(&sh->stripe_lock);
                conf->mddev->bitmap_ops->startwrite(conf->mddev, 
sh->sector,
                                        RAID5_STRIPE_SECTORS(conf), false);
+               printk("%s: %s: start %px(%llu+%lu) %u\n", __func__, 
mdname(conf->mddev), sh, sh->sector, RAID5_STRIPE_SECTORS(conf), 
atomic_inc_return(&sh->bitmap_counts));
                spin_lock_irq(&sh->stripe_lock);
                clear_bit(STRIPE_BITMAP_PENDING, &sh->state);
                if (!sh->batch_head) {
@@ -3662,10 +3664,12 @@ handle_failed_stripe(struct r5conf *conf, struct 
stripe_head *sh,
                        bio_io_error(bi);
                        bi = nextbi;
                }
-               if (bitmap_end)
+               if (bitmap_end) {
                        conf->mddev->bitmap_ops->endwrite(conf->mddev,
                                        sh->sector, 
RAID5_STRIPE_SECTORS(conf),
                                        false, false);
+                       printk("%s: %s: end %px(%llu+%lu) %u\n", 
__func__, mdname(conf->mddev), sh, sh->sector, 
RAID5_STRIPE_SECTORS(conf), atomic_dec_return(&sh->bitmap_counts));
+               }
                bitmap_end = 0;
                /* and fail all 'written' */
                bi = sh->dev[i].written;
@@ -3709,10 +3713,12 @@ handle_failed_stripe(struct r5conf *conf, struct 
stripe_head *sh,
                                bi = nextbi;
                        }
                }
-               if (bitmap_end)
+               if (bitmap_end) {
                        conf->mddev->bitmap_ops->endwrite(conf->mddev,
                                        sh->sector, 
RAID5_STRIPE_SECTORS(conf),
                                        false, false);
+                       printk("%s: %s: end %px(%llu+%lu) %u\n", 
__func__, mdname(conf->mddev), sh, sh->sector, 
RAID5_STRIPE_SECTORS(conf), atomic_dec_return(&sh->bitmap_counts));
+               }
                /* If we were in the middle of a write the parity block 
might
                 * still be locked - so just clear all R5_LOCKED flags
                 */
@@ -4065,6 +4071,7 @@ static void handle_stripe_clean_event(struct 
r5conf *conf,
                                        sh->sector, 
RAID5_STRIPE_SECTORS(conf),
                                        !test_bit(STRIPE_DEGRADED, 
&sh->state),
                                        false);
+                               printk("%s: %s: end %px(%llu+%lu) %u\n", 
__func__, mdname(conf->mddev), sh, sh->sector, 
RAID5_STRIPE_SECTORS(conf), atomic_dec_return(&sh->bitmap_counts));
                                if (head_sh->batch_head) {
                                        sh = 
list_first_entry(&sh->batch_list,
                                                              struct 
stripe_head,
@@ -5785,9 +5792,11 @@ static void make_discard_request(struct mddev 
*mddev, struct bio *bi)
                spin_unlock_irq(&sh->stripe_lock);
                if (conf->mddev->bitmap) {
                        for (d = 0; d < conf->raid_disks - 
conf->max_degraded;
-                            d++)
+                            d++) {
                                mddev->bitmap_ops->startwrite(mddev, 
sh->sector,
                                        RAID5_STRIPE_SECTORS(conf), false);
+                               printk("%s: %s: start %px(%llu+%lu) 
%u\n", __func__, mdname(conf->mddev), sh, sh->sector, 
RAID5_STRIPE_SECTORS(conf), atomic_inc_return(&sh->bitmap_counts));
+                       }
                        sh->bm_seq = conf->seq_flush + 1;
                        set_bit(STRIPE_BIT_DELAY, &sh->state);
                }
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 896ecfc4afa6..12024249245e 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -255,6 +255,7 @@ struct stripe_head {
        int     nr_pages;       /* page array size */
        int     stripes_per_page;
 #endif
+       atomic_t bitmap_counts;
        struct r5dev {
                /* rreq and rvec are used for the replacement device when
                 * writing data to both devices.