Re: [RFC PATCH 03/14] mm: Add free_unref_folios()

Ryan Roberts <ryan.roberts@xxxxxxx> · Thu, 31 Aug 2023 15:39:41 +0100

On 25/08/2023 14:59, Matthew Wilcox (Oracle) wrote:
> Iterate over a folio_batch rather than a linked list.  This is
> easier for the CPU to prefetch and has a batch count naturally
> built in so we don't need to track it.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx>
> ---
>  mm/internal.h   |  5 +++--
>  mm/page_alloc.c | 59 ++++++++++++++++++++++++++++++-------------------
>  2 files changed, 39 insertions(+), 25 deletions(-)
> 
> diff --git a/mm/internal.h b/mm/internal.h
> index 7499b5ea1cf6..5c6a53371aeb 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -441,8 +441,9 @@ extern void post_alloc_hook(struct page *page, unsigned int order,
>  					gfp_t gfp_flags);
>  extern int user_min_free_kbytes;
>  
> -extern void free_unref_page(struct page *page, unsigned int order);
> -extern void free_unref_page_list(struct list_head *list);
> +void free_unref_page(struct page *page, unsigned int order);
> +void free_unref_folios(struct folio_batch *fbatch);
> +void free_unref_page_list(struct list_head *list);
>  
>  extern void zone_pcp_reset(struct zone *zone);
>  extern void zone_pcp_disable(struct zone *zone);
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index f1ee96fd9bef..bca5c70b5576 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -32,6 +32,7 @@
>  #include <linux/sysctl.h>
>  #include <linux/cpu.h>
>  #include <linux/cpuset.h>
> +#include <linux/pagevec.h>
>  #include <linux/memory_hotplug.h>
>  #include <linux/nodemask.h>
>  #include <linux/vmstat.h>
> @@ -2464,57 +2465,51 @@ void free_unref_page(struct page *page, unsigned int order)
>  }
>  
>  /*
> - * Free a list of 0-order pages
> + * Free a batch of 0-order pages
>   */
> -void free_unref_page_list(struct list_head *list)
> +void free_unref_folios(struct folio_batch *folios)
>  {
>  	unsigned long __maybe_unused UP_flags;
> -	struct folio *folio, *next;
>  	struct per_cpu_pages *pcp = NULL;
>  	struct zone *locked_zone = NULL;
> -	int batch_count = 0;
> -	int migratetype;
> +	int i, j, migratetype;
>  
> -	/* Prepare pages for freeing */
> -	list_for_each_entry_safe(folio, next, list, lru) {
> +	/* Prepare folios for freeing */
> +	for (i = 0, j = 0; i < folios->nr; i++) {
> +		struct folio *folio = folios->folios[i];
>  		unsigned long pfn = folio_pfn(folio);
> -		if (!free_unref_page_prepare(&folio->page, pfn, 0)) {
> -			list_del(&folio->lru);
> +		if (!free_unref_page_prepare(&folio->page, pfn, 0))
>  			continue;
> -		}
>  
>  		/*
> -		 * Free isolated pages directly to the allocator, see
> +		 * Free isolated folios directly to the allocator, see
>  		 * comment in free_unref_page.
>  		 */
>  		migratetype = get_pcppage_migratetype(&folio->page);
>  		if (unlikely(is_migrate_isolate(migratetype))) {
> -			list_del(&folio->lru);
>  			free_one_page(folio_zone(folio), &folio->page, pfn,
>  					0, migratetype, FPI_NONE);
>  			continue;
>  		}
> +		if (j != i)
> +			folios->folios[j] = folio;
> +		j++;
>  	}
> +	folios->nr = j;
>  
> -	list_for_each_entry_safe(folio, next, list, lru) {
> +	for (i = 0; i < folios->nr; i++) {
> +		struct folio *folio = folios->folios[i];
>  		struct zone *zone = folio_zone(folio);
>  
> -		list_del(&folio->lru);
>  		migratetype = get_pcppage_migratetype(&folio->page);
>  
> -		/*
> -		 * Either different zone requiring a different pcp lock or
> -		 * excessive lock hold times when freeing a large list of
> -		 * folios.
> -		 */
> -		if (zone != locked_zone || batch_count == SWAP_CLUSTER_MAX) {

Same comment as for release_pages(): the batch count is effectively halved. Does
this have perf implications? I guess you'll need to benchmark...

> +		/* Different zone requires a different pcp lock */
> +		if (zone != locked_zone) {
>  			if (pcp) {
>  				pcp_spin_unlock(pcp);
>  				pcp_trylock_finish(UP_flags);
>  			}
>  
> -			batch_count = 0;
> -
>  			/*
>  			 * trylock is necessary as folios may be getting freed
>  			 * from IRQ or SoftIRQ context after an IO completion.
> @@ -2541,13 +2536,31 @@ void free_unref_page_list(struct list_head *list)
>  
>  		trace_mm_page_free_batched(&folio->page);
>  		free_unref_page_commit(zone, pcp, &folio->page, migratetype, 0);
> -		batch_count++;
>  	}
>  
>  	if (pcp) {
>  		pcp_spin_unlock(pcp);
>  		pcp_trylock_finish(UP_flags);
>  	}
> +	folios->nr = 0;

Same nits as for previous patch: Better to use the APIs rather than manipulate
the internal folio_batch state directly?

> +}
> +
> +void free_unref_page_list(struct list_head *list)
> +{
> +	struct folio_batch fbatch;
> +
> +	folio_batch_init(&fbatch);
> +	while (!list_empty(list)) {
> +		struct folio *folio = list_first_entry(list, struct folio, lru);
> +
> +		list_del(&folio->lru);
> +		if (folio_batch_add(&fbatch, folio) > 0)
> +			continue;
> +		free_unref_folios(&fbatch);
> +	}
> +
> +	if (fbatch.nr)
> +		free_unref_folios(&fbatch);
>  }
>  
>  /*