Hi Kuai There is a limitation of the memory in your test. But for most situations, customers should not set this. Can this change introduce a performance regression against other situations? Best Regards Xiao On Wed, Apr 26, 2023 at 4:24 PM Yu Kuai <yukuai1@xxxxxxxxxxxxxxx> wrote: > > From: Yu Kuai <yukuai3@xxxxxxxxxx> > > bio can be added to plug infinitely, and following writeback test can > trigger huge amount of plugged bio: > > Test script: > modprobe brd rd_nr=4 rd_size=10485760 > mdadm -CR /dev/md0 -l10 -n4 /dev/ram[0123] --assume-clean > echo 0 > /proc/sys/vm/dirty_background_ratio > echo 60 > /proc/sys/vm/dirty_ratio > fio -filename=/dev/md0 -ioengine=libaio -rw=write -bs=4k -numjobs=1 -iodepth=128 -name=test > > Test result: > Monitor /sys/block/md0/inflight will found that inflight keep increasing > until fio finish writing, after running for about 2 minutes: > > [root@fedora ~]# cat /sys/block/md0/inflight > 0 4474191 > > Fix the problem by limiting the number of plugged bio based on the number > of copies for original bio. > > Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx> > --- > drivers/md/raid1-10.c | 9 ++++++++- > drivers/md/raid1.c | 2 +- > drivers/md/raid10.c | 2 +- > 3 files changed, 10 insertions(+), 3 deletions(-) > > diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c > index 98d678b7df3f..35fb80aa37aa 100644 > --- a/drivers/md/raid1-10.c > +++ b/drivers/md/raid1-10.c > @@ -21,6 +21,7 @@ > #define IO_MADE_GOOD ((struct bio *)2) > > #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) > +#define MAX_PLUG_BIO 32 > > /* for managing resync I/O pages */ > struct resync_pages { > @@ -31,6 +32,7 @@ struct resync_pages { > struct raid1_plug_cb { > struct blk_plug_cb cb; > struct bio_list pending; > + unsigned int count; > }; > > static void rbio_pool_free(void *rbio, void *data) > @@ -127,7 +129,7 @@ static inline void md_submit_write(struct bio *bio) > } > > static inline bool md_add_bio_to_plug(struct mddev *mddev, struct bio *bio, > - blk_plug_cb_fn unplug) > + blk_plug_cb_fn unplug, int copies) > { > struct raid1_plug_cb *plug = NULL; > struct blk_plug_cb *cb; > @@ -147,6 +149,11 @@ static inline bool md_add_bio_to_plug(struct mddev *mddev, struct bio *bio, > > plug = container_of(cb, struct raid1_plug_cb, cb); > bio_list_add(&plug->pending, bio); > + if (++plug->count / MAX_PLUG_BIO >= copies) { > + list_del(&cb->list); > + cb->callback(cb, false); > + } > + > > return true; > } > diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c > index 639e09cecf01..c6066408a913 100644 > --- a/drivers/md/raid1.c > +++ b/drivers/md/raid1.c > @@ -1562,7 +1562,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, > r1_bio->sector); > /* flush_pending_writes() needs access to the rdev so...*/ > mbio->bi_bdev = (void *)rdev; > - if (!md_add_bio_to_plug(mddev, mbio, raid1_unplug)) { > + if (!md_add_bio_to_plug(mddev, mbio, raid1_unplug, disks)) { > spin_lock_irqsave(&conf->device_lock, flags); > bio_list_add(&conf->pending_bio_list, mbio); > spin_unlock_irqrestore(&conf->device_lock, flags); > diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c > index bd9e655ca408..7135cfaf75db 100644 > --- a/drivers/md/raid10.c > +++ b/drivers/md/raid10.c > @@ -1306,7 +1306,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, > > atomic_inc(&r10_bio->remaining); > > - if (!md_add_bio_to_plug(mddev, mbio, raid10_unplug)) { > + if (!md_add_bio_to_plug(mddev, mbio, raid10_unplug, conf->copies)) { > spin_lock_irqsave(&conf->device_lock, flags); > bio_list_add(&conf->pending_bio_list, mbio); > spin_unlock_irqrestore(&conf->device_lock, flags); > -- > 2.39.2 >