On Mon, May 29, 2023 at 6:14 AM Yu Kuai <yukuai1@xxxxxxxxxxxxxxx> wrote: > > From: Yu Kuai <yukuai3@xxxxxxxxxx> > > If bitmap is enabled, bitmap must update before submitting write io, this > is why unplug callback must move these io to 'conf->pending_io_list' if > 'current->bio_list' is not empty, which will suffer performance > degradation. > > A new helper md_bitmap_unplug_async() is introduced to submit bitmap io > in a kworker, so that submit bitmap io in raid10_unplug() doesn't require > that 'current->bio_list' is empty. > > This patch prepare to limit the number of plugged bio. > > Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx> > --- > drivers/md/md-bitmap.c | 29 +++++++++++++++++++++++++++++ > drivers/md/md-bitmap.h | 1 + > drivers/md/md.c | 9 +++++++++ > drivers/md/md.h | 1 + > 4 files changed, 40 insertions(+) > > diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c > index 3ee590cf12a7..25cd72b317f1 100644 > --- a/drivers/md/md-bitmap.c > +++ b/drivers/md/md-bitmap.c > @@ -1054,6 +1054,35 @@ void md_bitmap_unplug(struct bitmap *bitmap) > } > EXPORT_SYMBOL(md_bitmap_unplug); > > +struct bitmap_unplug_work { > + struct work_struct work; > + struct bitmap *bitmap; > + struct completion *done; > +}; > + > +static void md_bitmap_unplug_fn(struct work_struct *work) > +{ > + struct bitmap_unplug_work *unplug_work = > + container_of(work, struct bitmap_unplug_work, work); > + > + md_bitmap_unplug(unplug_work->bitmap); > + complete(unplug_work->done); > +} > + > +void md_bitmap_unplug_async(struct bitmap *bitmap) > +{ > + DECLARE_COMPLETION_ONSTACK(done); > + struct bitmap_unplug_work unplug_work; > + > + INIT_WORK(&unplug_work.work, md_bitmap_unplug_fn); > + unplug_work.bitmap = bitmap; > + unplug_work.done = &done; > + > + queue_work(md_bitmap_wq, &unplug_work.work); > + wait_for_completion(&done); > +} > +EXPORT_SYMBOL(md_bitmap_unplug_async); I updated the patch with: diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 25cd72b317f1..1ff712889a3b 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -1074,7 +1074,7 @@ void md_bitmap_unplug_async(struct bitmap *bitmap) DECLARE_COMPLETION_ONSTACK(done); struct bitmap_unplug_work unplug_work; - INIT_WORK(&unplug_work.work, md_bitmap_unplug_fn); + INIT_WORK_ONSTACK(&unplug_work.work, md_bitmap_unplug_fn); unplug_work.bitmap = bitmap; unplug_work.done = &done; Otherwise, we will get the following warning with mdadm test 05r1-add-internalbitmap. Thanks, Song [ 135.072374] ODEBUG: object ffffc90005adf500 is on stack ffffc90005ad8000, but NOT annotated. [ 135.074994] ------------[ cut here ]------------ [ 135.075770] WARNING: CPU: 1 PID: 1115 at lib/debugobjects.c:548 lookup_object_or_alloc+0x294/0x560 [ 135.077085] Modules linked in: [ 135.077571] CPU: 1 PID: 1115 Comm: mkfs.ext3 Not tainted 6.4.0-rc2+ #737 [ 135.078542] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 [ 135.080398] RIP: 0010:lookup_object_or_alloc+0x294/0x560 [ 135.081165] Code: 2d b1 24 15 09 65 48 8b 2c 25 80 9e 1f 00 48 8d 7d 20 e8 7f 19 a9 ff 48 8b 55 20 48 89 de 48 c7 c7 00 f2 05 83 e8 5c 97 70 ff <0f> 0b 48 83 c4 58 4c 89 f8 5b 5d 41 5c 41 5d 41 5e 41 5f c3 cc cc [ 135.083830] RSP: 0018:ffffc90005adf398 EFLAGS: 00010082 [ 135.084568] RAX: 0000000000000050 RBX: ffffc90005adf500 RCX: 0000000000000000 [ 135.085582] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffffffff8a7dea40 [ 135.086687] RBP: 0000000000000001 R08: fffff52000b5be3d R09: fffff52000b5be3d [ 135.087684] R10: ffffc90005adf1e7 R11: fffff52000b5be3c R12: ffff888103ee0040 [ 135.088725] R13: ffff888103ee0060 R14: ffff888df71f7c40 R15: ffff88815eb90060 [ 135.089780] FS: 00007fa1f1c85780(0000) GS:ffff888df7000000(0000) knlGS:0000000000000000 [ 135.090920] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 135.091751] CR2: 000055add4915f44 CR3: 000000011227a002 CR4: 0000000000370ee0 [ 135.092775] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 135.093812] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 135.094838] Call Trace: [ 135.095178] <TASK> [ 135.095528] ? _raw_spin_lock_irqsave+0x6d/0x90 [ 135.096197] __debug_object_init+0x63/0x130 [ 135.096853] md_bitmap_unplug_async+0x9c/0x180 [ 135.097512] ? __pfx_md_bitmap_unplug_async+0x10/0x10 [ 135.098267] ? bio_associate_blkg_from_css+0x446/0xb10 [ 135.099043] ? update_io_ticks+0xad/0x180 [ 135.099636] ? __pfx_update_io_ticks+0x10/0x10 [ 135.100355] flush_bio_list+0x60/0x1c0 [ 135.100922] raid1_unplug+0x64/0x120 [ 135.101460] raid1_make_request+0xfd7/0x1e10 [ 135.102125] ? __pfx_raid1_make_request+0x10/0x10 [ 135.102806] ? lock_release+0x27a/0x690 [ 135.103368] ? __pfx_lock_release+0x10/0x10 [ 135.103963] ? md_handle_request+0x2b3/0x7c0 [ 135.104570] ? __pfx_rcu_read_lock_held+0x10/0x10 [ 135.105232] ? md_handle_request+0x2b3/0x7c0 [ 135.105886] md_handle_request+0x365/0x7c0 [ 135.106492] ? __pfx_md_handle_request+0x10/0x10 [ 135.107177] __submit_bio+0x1dc/0x5d0 [ 135.107770] submit_bio_noacct_nocheck+0x2c2/0x5b0 [ 135.108457] ? lock_is_held_type+0xd8/0x130 [ 135.109076] ? __pfx_submit_bio_noacct_nocheck+0x10/0x10 [ 135.109872] ? submit_bio_noacct+0x283/0x750 [ 135.110533] __block_write_full_page+0x45f/0xbb0 [ 135.111200] ? __pfx_blkdev_get_block+0x10/0x10 [ 135.111901] ? __pfx_end_buffer_async_write+0x10/0x10 [ 135.112741] writepage_cb+0x3e/0xc0 [ 135.113285] write_cache_pages+0x2d8/0x730 [ 135.113906] ? __pfx_writepage_cb+0x10/0x10 [ 135.114531] ? __pfx_write_cache_pages+0x10/0x10 [ 135.115196] ? lock_release+0x27a/0x690 [ 135.115819] do_writepages+0x190/0x310 [ 135.116370] ? __pfx_do_writepages+0x10/0x10 [ 135.116995] ? preempt_count_sub+0x14/0xc0 [ 135.117596] ? _raw_spin_unlock+0x29/0x50 [ 135.118171] ? wbc_attach_and_unlock_inode+0x216/0x410 [ 135.118934] filemap_fdatawrite_wbc+0x93/0xc0 [ 135.119574] __filemap_fdatawrite_range+0x99/0xd0 [ 135.120246] ? __pfx___filemap_fdatawrite_range+0x10/0x10 [ 135.121055] ? ktime_get_coarse_real_ts64+0xec/0x100 [ 135.121789] file_write_and_wait_range+0x67/0xd0 [ 135.122578] blkdev_fsync+0x35/0x60 [ 135.123095] do_fsync+0x38/0x70 [ 135.123587] ? syscall_trace_enter.isra.15+0x15c/0x1f0 [ 135.124318] __x64_sys_fsync+0x1d/0x30 [ 135.124924] do_syscall_64+0x3a/0x90 [ 135.125481] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 135.126197] RIP: 0033:0x7fa1efef4478 [ 135.126765] Code: 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 8d 05 25 01 2d 00 8b 00 85 c0 75 17 b8 4a 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 40 c3 0f 1f 80 00 00 00 00 53 89 fb 48 83 ec [ 135.129424] RSP: 002b:00007fffb2176318 EFLAGS: 00000246 ORIG_RAX: 000000000000004a [ 135.130498] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fa1efef4478 [ 135.131493] RDX: 0000000000000400 RSI: 0000559e026969e0 RDI: 0000000000000003 [ 135.132491] RBP: 0000559e026950d0 R08: 0000559e026969e0 R09: 00007fa1eff80620 [ 135.133496] R10: 0000000000800800 R11: 0000000000000246 R12: 00007fffb2176390 [ 135.134491] R13: 00007fffb2176398 R14: 0000559e02698e70 R15: 0000559e026944f0 [ 135.135515] </TASK> [ 135.135843] irq event stamp: 141152 [ 135.136349] hardirqs last enabled at (141151): [<ffffffff827778e0>] _raw_spin_unlock_irqrestore+0x30/0x60 [ 135.137721] hardirqs last disabled at (141152): [<ffffffff827775cd>] _raw_spin_lock_irqsave+0x6d/0x90 [ 135.139031] softirqs last enabled at (139890): [<ffffffff827793fb>] __do_softirq+0x3eb/0x531 [ 135.140249] softirqs last disabled at (139885): [<ffffffff810d8f45>] irq_exit_rcu+0x115/0x160 [ 135.141503] ---[ end trace 0000000000000000 ]--- > + > static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); > /* * bitmap_init_from_disk -- called at bitmap_create time to initialize > * the in-memory bitmap from the on-disk bitmap -- also, sets up the > diff --git a/drivers/md/md-bitmap.h b/drivers/md/md-bitmap.h > index 3a4750952b3a..8a3788c9bfef 100644 > --- a/drivers/md/md-bitmap.h > +++ b/drivers/md/md-bitmap.h > @@ -264,6 +264,7 @@ void md_bitmap_sync_with_cluster(struct mddev *mddev, > sector_t new_lo, sector_t new_hi); > > void md_bitmap_unplug(struct bitmap *bitmap); > +void md_bitmap_unplug_async(struct bitmap *bitmap); > void md_bitmap_daemon_work(struct mddev *mddev); > > int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, > diff --git a/drivers/md/md.c b/drivers/md/md.c > index e592f37a1071..a5a7af2f4e59 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -83,6 +83,7 @@ static struct module *md_cluster_mod; > static DECLARE_WAIT_QUEUE_HEAD(resync_wait); > static struct workqueue_struct *md_wq; > static struct workqueue_struct *md_misc_wq; > +struct workqueue_struct *md_bitmap_wq; > > static int remove_and_add_spares(struct mddev *mddev, > struct md_rdev *this); > @@ -9636,6 +9637,11 @@ static int __init md_init(void) > if (!md_misc_wq) > goto err_misc_wq; > > + md_bitmap_wq = alloc_workqueue("md_bitmap", WQ_MEM_RECLAIM | WQ_UNBOUND, > + 0); > + if (!md_bitmap_wq) > + goto err_bitmap_wq; > + > ret = __register_blkdev(MD_MAJOR, "md", md_probe); > if (ret < 0) > goto err_md; > @@ -9654,6 +9660,8 @@ static int __init md_init(void) > err_mdp: > unregister_blkdev(MD_MAJOR, "md"); > err_md: > + destroy_workqueue(md_bitmap_wq); > +err_bitmap_wq: > destroy_workqueue(md_misc_wq); > err_misc_wq: > destroy_workqueue(md_wq); > @@ -9950,6 +9958,7 @@ static __exit void md_exit(void) > spin_unlock(&all_mddevs_lock); > > destroy_workqueue(md_misc_wq); > + destroy_workqueue(md_bitmap_wq); > destroy_workqueue(md_wq); > } > > diff --git a/drivers/md/md.h b/drivers/md/md.h > index a50122165fa1..bfd2306bc750 100644 > --- a/drivers/md/md.h > +++ b/drivers/md/md.h > @@ -852,6 +852,7 @@ struct mdu_array_info_s; > struct mdu_disk_info_s; > > extern int mdp_major; > +extern struct workqueue_struct *md_bitmap_wq; > void md_autostart_arrays(int part); > int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info); > int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info); > -- > 2.39.2 >