On 25/08/2023 14:59, Matthew Wilcox (Oracle) wrote: > Iterate over a folio_batch rather than a linked list. This is > easier for the CPU to prefetch and has a batch count naturally > built in so we don't need to track it. > > Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> > --- > mm/internal.h | 5 +++-- > mm/page_alloc.c | 59 ++++++++++++++++++++++++++++++------------------- > 2 files changed, 39 insertions(+), 25 deletions(-) > > diff --git a/mm/internal.h b/mm/internal.h > index 7499b5ea1cf6..5c6a53371aeb 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -441,8 +441,9 @@ extern void post_alloc_hook(struct page *page, unsigned int order, > gfp_t gfp_flags); > extern int user_min_free_kbytes; > > -extern void free_unref_page(struct page *page, unsigned int order); > -extern void free_unref_page_list(struct list_head *list); > +void free_unref_page(struct page *page, unsigned int order); > +void free_unref_folios(struct folio_batch *fbatch); > +void free_unref_page_list(struct list_head *list); > > extern void zone_pcp_reset(struct zone *zone); > extern void zone_pcp_disable(struct zone *zone); > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index f1ee96fd9bef..bca5c70b5576 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -32,6 +32,7 @@ > #include <linux/sysctl.h> > #include <linux/cpu.h> > #include <linux/cpuset.h> > +#include <linux/pagevec.h> > #include <linux/memory_hotplug.h> > #include <linux/nodemask.h> > #include <linux/vmstat.h> > @@ -2464,57 +2465,51 @@ void free_unref_page(struct page *page, unsigned int order) > } > > /* > - * Free a list of 0-order pages > + * Free a batch of 0-order pages > */ > -void free_unref_page_list(struct list_head *list) > +void free_unref_folios(struct folio_batch *folios) > { > unsigned long __maybe_unused UP_flags; > - struct folio *folio, *next; > struct per_cpu_pages *pcp = NULL; > struct zone *locked_zone = NULL; > - int batch_count = 0; > - int migratetype; > + int i, j, migratetype; > > - /* Prepare pages for freeing */ > - list_for_each_entry_safe(folio, next, list, lru) { > + /* Prepare folios for freeing */ > + for (i = 0, j = 0; i < folios->nr; i++) { > + struct folio *folio = folios->folios[i]; > unsigned long pfn = folio_pfn(folio); > - if (!free_unref_page_prepare(&folio->page, pfn, 0)) { > - list_del(&folio->lru); > + if (!free_unref_page_prepare(&folio->page, pfn, 0)) > continue; > - } > > /* > - * Free isolated pages directly to the allocator, see > + * Free isolated folios directly to the allocator, see > * comment in free_unref_page. > */ > migratetype = get_pcppage_migratetype(&folio->page); > if (unlikely(is_migrate_isolate(migratetype))) { > - list_del(&folio->lru); > free_one_page(folio_zone(folio), &folio->page, pfn, > 0, migratetype, FPI_NONE); > continue; > } > + if (j != i) > + folios->folios[j] = folio; > + j++; > } > + folios->nr = j; > > - list_for_each_entry_safe(folio, next, list, lru) { > + for (i = 0; i < folios->nr; i++) { > + struct folio *folio = folios->folios[i]; > struct zone *zone = folio_zone(folio); > > - list_del(&folio->lru); > migratetype = get_pcppage_migratetype(&folio->page); > > - /* > - * Either different zone requiring a different pcp lock or > - * excessive lock hold times when freeing a large list of > - * folios. > - */ > - if (zone != locked_zone || batch_count == SWAP_CLUSTER_MAX) { Same comment as for release_pages(): the batch count is effectively halved. Does this have perf implications? I guess you'll need to benchmark... > + /* Different zone requires a different pcp lock */ > + if (zone != locked_zone) { > if (pcp) { > pcp_spin_unlock(pcp); > pcp_trylock_finish(UP_flags); > } > > - batch_count = 0; > - > /* > * trylock is necessary as folios may be getting freed > * from IRQ or SoftIRQ context after an IO completion. > @@ -2541,13 +2536,31 @@ void free_unref_page_list(struct list_head *list) > > trace_mm_page_free_batched(&folio->page); > free_unref_page_commit(zone, pcp, &folio->page, migratetype, 0); > - batch_count++; > } > > if (pcp) { > pcp_spin_unlock(pcp); > pcp_trylock_finish(UP_flags); > } > + folios->nr = 0; Same nits as for previous patch: Better to use the APIs rather than manipulate the internal folio_batch state directly? > +} > + > +void free_unref_page_list(struct list_head *list) > +{ > + struct folio_batch fbatch; > + > + folio_batch_init(&fbatch); > + while (!list_empty(list)) { > + struct folio *folio = list_first_entry(list, struct folio, lru); > + > + list_del(&folio->lru); > + if (folio_batch_add(&fbatch, folio) > 0) > + continue; > + free_unref_folios(&fbatch); > + } > + > + if (fbatch.nr) > + free_unref_folios(&fbatch); > } > > /*