Re: [RFC PATCH 01/14] mm: Make folios_put() the basis of release_pages()

Ryan Roberts <ryan.roberts@xxxxxxx> · Thu, 31 Aug 2023 15:21:53 +0100

On 25/08/2023 14:59, Matthew Wilcox (Oracle) wrote:
> By making release_pages() call folios_put(), we can get rid of the calls
> to compound_head() for the callers that already know they have folios.
> We can also get rid of the lock_batch tracking as we know the size of
> the batch is limited by folio_batch.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx>
> ---
>  include/linux/mm.h | 19 ++---------
>  mm/mlock.c         |  3 +-
>  mm/swap.c          | 84 +++++++++++++++++++++++++++-------------------
>  3 files changed, 52 insertions(+), 54 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 2c6b54b5506a..7d1d96b75d11 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -36,6 +36,7 @@ struct anon_vma;
>  struct anon_vma_chain;
>  struct user_struct;
>  struct pt_regs;
> +struct folio_batch;
>  
>  extern int sysctl_page_lock_unfairness;
>  
> @@ -1521,23 +1522,7 @@ typedef union {
>  } release_pages_arg __attribute__ ((__transparent_union__));
>  
>  void release_pages(release_pages_arg, int nr);
> -
> -/**
> - * folios_put - Decrement the reference count on an array of folios.
> - * @folios: The folios.
> - * @nr: How many folios there are.
> - *
> - * Like folio_put(), but for an array of folios.  This is more efficient
> - * than writing the loop yourself as it will optimise the locks which
> - * need to be taken if the folios are freed.
> - *
> - * Context: May be called in process or interrupt context, but not in NMI
> - * context.  May be called while holding a spinlock.
> - */
> -static inline void folios_put(struct folio **folios, unsigned int nr)
> -{
> -	release_pages(folios, nr);
> -}
> +void folios_put(struct folio_batch *folios);
>  
>  static inline void put_page(struct page *page)
>  {
> diff --git a/mm/mlock.c b/mm/mlock.c
> index 06bdfab83b58..67bd74a6268a 100644
> --- a/mm/mlock.c
> +++ b/mm/mlock.c
> @@ -206,8 +206,7 @@ static void mlock_folio_batch(struct folio_batch *fbatch)
>  
>  	if (lruvec)
>  		unlock_page_lruvec_irq(lruvec);
> -	folios_put(fbatch->folios, folio_batch_count(fbatch));
> -	folio_batch_reinit(fbatch);
> +	folios_put(fbatch);
>  }
>  
>  void mlock_drain_local(void)
> diff --git a/mm/swap.c b/mm/swap.c
> index cd8f0150ba3a..7bdc63b56859 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -89,7 +89,7 @@ static void __page_cache_release(struct folio *folio)
>  		__folio_clear_lru_flags(folio);
>  		unlock_page_lruvec_irqrestore(lruvec, flags);
>  	}
> -	/* See comment on folio_test_mlocked in release_pages() */
> +	/* See comment on folio_test_mlocked in folios_put() */
>  	if (unlikely(folio_test_mlocked(folio))) {
>  		long nr_pages = folio_nr_pages(folio);
>  
> @@ -175,7 +175,7 @@ static void lru_add_fn(struct lruvec *lruvec, struct folio *folio)
>  	 * while the LRU lock is held.
>  	 *
>  	 * (That is not true of __page_cache_release(), and not necessarily
> -	 * true of release_pages(): but those only clear the mlocked flag after
> +	 * true of folios_put(): but those only clear the mlocked flag after
>  	 * folio_put_testzero() has excluded any other users of the folio.)
>  	 */
>  	if (folio_evictable(folio)) {
> @@ -221,8 +221,7 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
>  
>  	if (lruvec)
>  		unlock_page_lruvec_irqrestore(lruvec, flags);
> -	folios_put(fbatch->folios, folio_batch_count(fbatch));
> -	folio_batch_reinit(fbatch);
> +	folios_put(fbatch);
>  }
>  
>  static void folio_batch_add_and_move(struct folio_batch *fbatch,
> @@ -946,41 +945,27 @@ void lru_cache_disable(void)
>  }
>  
>  /**
> - * release_pages - batched put_page()
> - * @arg: array of pages to release
> - * @nr: number of pages
> + * folios_put - Decrement the reference count on a batch of folios.
> + * @folios: The folios.
>   *
> - * Decrement the reference count on all the pages in @arg.  If it
> - * fell to zero, remove the page from the LRU and free it.
> + * Like folio_put(), but for a batch of folios.  This is more efficient
> + * than writing the loop yourself as it will optimise the locks which need
> + * to be taken if the folios are freed.  The folios batch is returned
> + * empty and ready to be reused for another batch; there is no need to
> + * reinitialise it.
>   *
> - * Note that the argument can be an array of pages, encoded pages,
> - * or folio pointers. We ignore any encoded bits, and turn any of
> - * them into just a folio that gets free'd.
> + * Context: May be called in process or interrupt context, but not in NMI
> + * context.  May be called while holding a spinlock.
>   */
> -void release_pages(release_pages_arg arg, int nr)
> +void folios_put(struct folio_batch *folios)
>  {
>  	int i;
> -	struct encoded_page **encoded = arg.encoded_pages;
>  	LIST_HEAD(pages_to_free);
>  	struct lruvec *lruvec = NULL;
>  	unsigned long flags = 0;
> -	unsigned int lock_batch;
>  
> -	for (i = 0; i < nr; i++) {
> -		struct folio *folio;
> -
> -		/* Turn any of the argument types into a folio */
> -		folio = page_folio(encoded_page_ptr(encoded[i]));
> -
> -		/*
> -		 * Make sure the IRQ-safe lock-holding time does not get
> -		 * excessive with a continuous string of pages from the
> -		 * same lruvec. The lock is held only if lruvec != NULL.
> -		 */
> -		if (lruvec && ++lock_batch == SWAP_CLUSTER_MAX) {

SWAP_CLUSTER_MAX is 32. By using the folio_batch, I think you are limitted to 15
in your batch, so I guess you could be taking/releasing the lock x2 as often? Is
there any perf implication?

> -			unlock_page_lruvec_irqrestore(lruvec, flags);
> -			lruvec = NULL;
> -		}
> +	for (i = 0; i < folios->nr; i++) {
> +		struct folio *folio = folios->folios[i];
>  
>  		if (is_huge_zero_page(&folio->page))
>  			continue;
> @@ -1010,13 +995,8 @@ void release_pages(release_pages_arg arg, int nr)
>  		}
>  
>  		if (folio_test_lru(folio)) {
> -			struct lruvec *prev_lruvec = lruvec;
> -
>  			lruvec = folio_lruvec_relock_irqsave(folio, lruvec,
>  									&flags);
> -			if (prev_lruvec != lruvec)
> -				lock_batch = 0;
> -
>  			lruvec_del_folio(lruvec, folio);
>  			__folio_clear_lru_flags(folio);
>  		}
> @@ -1040,6 +1020,40 @@ void release_pages(release_pages_arg arg, int nr)
>  
>  	mem_cgroup_uncharge_list(&pages_to_free);
>  	free_unref_page_list(&pages_to_free);
> +	folios->nr = 0;

folio_batch_reinit(folios) ?

> +}
> +EXPORT_SYMBOL(folios_put);
> +
> +/**
> + * release_pages - batched put_page()
> + * @arg: array of pages to release
> + * @nr: number of pages
> + *
> + * Decrement the reference count on all the pages in @arg.  If it
> + * fell to zero, remove the page from the LRU and free it.
> + *
> + * Note that the argument can be an array of pages, encoded pages,
> + * or folio pointers. We ignore any encoded bits, and turn any of
> + * them into just a folio that gets free'd.
> + */
> +void release_pages(release_pages_arg arg, int nr)
> +{
> +	struct folio_batch fbatch;
> +	struct encoded_page **encoded = arg.encoded_pages;
> +	int i;
> +
> +	folio_batch_init(&fbatch);
> +	for (i = 0; i < nr; i++) {
> +		/* Turn any of the argument types into a folio */
> +		struct folio *folio = page_folio(encoded_page_ptr(encoded[i]));
> +
> +		if (folio_batch_add(&fbatch, folio) > 0)
> +			continue;
> +		folios_put(&fbatch);
> +	}
> +
> +	if (fbatch.nr)

if (folio_batch_count(&fbatch)) ?

> +		folios_put(&fbatch);
>  }
>  EXPORT_SYMBOL(release_pages);
>