wake_up() will acquire spinlock even waitqueue is empty. This might involve cache sync overhead. Let's only call wake_up() when waitqueue is active. Suggested-by: "Huang, Ying" <ying.huang@xxxxxxxxx> Signed-off-by: Barry Song <v-songbaohua@xxxxxxxx> --- mm/memory.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index fe21bd3beff5..4adb2d0bcc7a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4623,7 +4623,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) /* Clear the swap cache pin for direct swapin after PTL unlock */ if (need_clear_cache) { swapcache_clear(si, entry, nr_pages); - wake_up(&swapcache_wq); + if (waitqueue_active(&swapcache_wq)) + wake_up(&swapcache_wq); } if (si) put_swap_device(si); @@ -4641,7 +4642,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) } if (need_clear_cache) { swapcache_clear(si, entry, nr_pages); - wake_up(&swapcache_wq); + if (waitqueue_active(&swapcache_wq)) + wake_up(&swapcache_wq); } if (si) put_swap_device(si); -- 2.39.3 (Apple Git-146) > > > diff --git a/mm/memory.c b/mm/memory.c > > index 2366578015ad..aae0e532d8b6 100644 > > --- a/mm/memory.c > > +++ b/mm/memory.c > > @@ -4192,6 +4192,23 @@ static struct folio *alloc_swap_folio(struct vm_fault *vmf) > > Â } > > Â #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ > > > > +/* > > + * Alleviating the 'thundering herd' phenomenon using a waitqueue hash > > + * when multiple do_swap_page() operations occur simultaneously. > > + */ > > +#define SWAPCACHE_WAIT_TABLE_BITS 5 > > +#define SWAPCACHE_WAIT_TABLE_SIZE (1 << SWAPCACHE_WAIT_TABLE_BITS) > > +static wait_queue_head_t swapcache_wqs[SWAPCACHE_WAIT_TABLE_SIZE]; > > + > > +static int __init swapcache_wqs_init(void) > > +{ > > + Â Â for (int i = 0; i < SWAPCACHE_WAIT_TABLE_SIZE; i++) > > + Â Â Â Â Â Â init_waitqueue_head(&swapcache_wqs[i]); > > + > > + Â Â Â Â return 0; > > +} > > +late_initcall(swapcache_wqs_init); > > + > > Â /* > > Â * We enter with non-exclusive mmap_lock (to exclude vma changes, > > Â * but allow concurrent faults), and pte mapped but not yet locked. > > @@ -4204,6 +4221,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) > > Â { > > Â Â Â struct vm_area_struct *vma = vmf->vma; > > Â Â Â struct folio *swapcache, *folio = NULL; > > + Â Â DECLARE_WAITQUEUE(wait, current); > > + Â Â wait_queue_head_t *swapcache_wq; > > Â Â Â struct page *page; > > Â Â Â struct swap_info_struct *si = NULL; > > Â Â Â rmap_t rmap_flags = RMAP_NONE; > > @@ -4297,12 +4316,16 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) > > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â * undetectable as pte_same() returns true due > > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â * to entry reuse. > > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â */ > > + Â Â Â Â Â Â Â Â Â Â Â Â Â Â swapcache_wq = &swapcache_wqs[hash_long(vmf->address & PMD_MASK, > > + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â SWAPCACHE_WAIT_TABLE_BITS)]; > > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â if (swapcache_prepare(entry, nr_pages)) { > > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â /* > > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â * Relax a bit to prevent rapid > > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â * repeated page faults. > > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â */ > > + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â add_wait_queue(swapcache_wq, &wait); > > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â schedule_timeout_uninterruptible(1); > > + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â remove_wait_queue(swapcache_wq, &wait); > > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â goto out_page; > > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â } > > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â need_clear_cache = true; > > @@ -4609,8 +4632,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) > > Â Â Â Â Â Â Â pte_unmap_unlock(vmf->pte, vmf->ptl); > > Â out: > > Â Â Â /* Clear the swap cache pin for direct swapin after PTL unlock */ > > - Â Â if (need_clear_cache) > > + Â Â if (need_clear_cache) { > > Â Â Â Â Â Â Â swapcache_clear(si, entry, nr_pages); > > + Â Â Â Â Â Â wake_up(swapcache_wq); > > + Â Â } > > Â Â Â if (si) > > Â Â Â Â Â Â Â put_swap_device(si); > > Â Â Â return ret; > > @@ -4625,8 +4650,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) > > Â Â Â Â Â Â Â folio_unlock(swapcache); > > Â Â Â Â Â Â Â folio_put(swapcache); > > Â Â Â } > > - Â Â if (need_clear_cache) > > + Â Â if (need_clear_cache) { > > Â Â Â Â Â Â Â swapcache_clear(si, entry, nr_pages); > > + Â Â Â Â Â Â wake_up(swapcache_wq); > > + Â Â } > > Â Â Â if (si) > > Â Â Â Â Â Â Â put_swap_device(si); > > Â Â Â return ret; > > -- > Best Regards, > Huang, Ying