Re: [PATCH RFC 23/39] mm/rmap: introduce folio_remove_rmap_[pte|ptes|pmd]()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 04/12/2023 14:21, David Hildenbrand wrote:
> Let's mimic what we did with folio_add_file_rmap_*() and
> folio_add_anon_rmap_*() so we can similarly replace page_remove_rmap()
> next.
> 
> Make the compiler always special-case on the granularity by using
> __always_inline.
> 
> We're adding folio_remove_rmap_ptes() handling right away, as we want to
> use that soon for batching rmap operations when unmapping PTE-mapped
> large folios.
> 
> Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
> ---
>  include/linux/rmap.h |  6 ++++
>  mm/rmap.c            | 76 ++++++++++++++++++++++++++++++++++++--------
>  2 files changed, 68 insertions(+), 14 deletions(-)
> 
> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> index 017b216915f19..dd4ffb1d8ae04 100644
> --- a/include/linux/rmap.h
> +++ b/include/linux/rmap.h
> @@ -241,6 +241,12 @@ void folio_add_file_rmap_pmd(struct folio *, struct page *,
>  		struct vm_area_struct *);
>  void page_remove_rmap(struct page *, struct vm_area_struct *,
>  		bool compound);
> +void folio_remove_rmap_ptes(struct folio *, struct page *, unsigned int nr,
> +		struct vm_area_struct *);
> +#define folio_remove_rmap_pte(folio, page, vma) \
> +	folio_remove_rmap_ptes(folio, page, 1, vma)
> +void folio_remove_rmap_pmd(struct folio *, struct page *,
> +		struct vm_area_struct *);
>  
>  void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *,
>  		unsigned long address, rmap_t flags);
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 3587225055c5e..50b6909157ac1 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1463,25 +1463,36 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
>  		bool compound)
>  {
>  	struct folio *folio = page_folio(page);
> +
> +	if (likely(!compound))
> +		folio_remove_rmap_pte(folio, page, vma);
> +	else
> +		folio_remove_rmap_pmd(folio, page, vma);
> +}
> +
> +static __always_inline void __folio_remove_rmap(struct folio *folio,
> +		struct page *page, unsigned int nr_pages,
> +		struct vm_area_struct *vma, enum rmap_mode mode)
> +{
>  	atomic_t *mapped = &folio->_nr_pages_mapped;
> -	int nr = 0, nr_pmdmapped = 0;
> -	bool last;
> +	int last, nr = 0, nr_pmdmapped = 0;

nit: you're being inconsistent across the functions with signed vs unsigned for
page counts (e.g. nr, nr_pmdmapped) - see __folio_add_rmap(),
__folio_add_file_rmap(), __folio_add_anon_rmap().

I suggest pick one and stick to it. Personally I'd go with signed int (since
that's what all the counters in struct folio that we are manipulating are,
underneath the atomic_t) then check that nr_pages > 0 in
__folio_rmap_sanity_checks().

>  	enum node_stat_item idx;
>  
> -	VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
> -	VM_BUG_ON_PAGE(compound && !PageHead(page), page);
> +	__folio_rmap_sanity_checks(folio, page, nr_pages, mode);
>  
>  	/* Is page being unmapped by PTE? Is this its last map to be removed? */
> -	if (likely(!compound)) {
> -		last = atomic_add_negative(-1, &page->_mapcount);
> -		nr = last;
> -		if (last && folio_test_large(folio)) {
> -			nr = atomic_dec_return_relaxed(mapped);
> -			nr = (nr < COMPOUND_MAPPED);
> -		}
> -	} else if (folio_test_pmd_mappable(folio)) {
> -		/* That test is redundant: it's for safety or to optimize out */
> +	if (likely(mode == RMAP_MODE_PTE)) {
> +		do {
> +			last = atomic_add_negative(-1, &page->_mapcount);
> +			if (last && folio_test_large(folio)) {
> +				last = atomic_dec_return_relaxed(mapped);
> +				last = (last < COMPOUND_MAPPED);
> +			}
>  
> +			if (last)
> +				nr++;
> +		} while (page++, --nr_pages > 0);
> +	} else if (mode == RMAP_MODE_PMD) {
>  		last = atomic_add_negative(-1, &folio->_entire_mapcount);
>  		if (last) {
>  			nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped);
> @@ -1517,7 +1528,7 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
>  		 * is still mapped.
>  		 */
>  		if (folio_test_pmd_mappable(folio) && folio_test_anon(folio))

folio_test_pmd_mappable() -> folio_test_large()

Since you're converting this to support batch PTE removal, it might as well also
support smaller-than-pmd too?

I currently have a patch to do this same change in the multi-size THP series.


> -			if (!compound || nr < nr_pmdmapped)
> +			if (mode == RMAP_MODE_PTE || nr < nr_pmdmapped)
>  				deferred_split_folio(folio);
>  	}
>  
> @@ -1532,6 +1543,43 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
>  	munlock_vma_folio(folio, vma);
>  }
>  
> +/**
> + * folio_remove_rmap_ptes - remove PTE mappings from a page range of a folio
> + * @folio:	The folio to remove the mappings from
> + * @page:	The first page to remove
> + * @nr_pages:	The number of pages that will be removed from the mapping
> + * @vma:	The vm area from which the mappings are removed
> + *
> + * The page range of the folio is defined by [page, page + nr_pages)
> + *
> + * The caller needs to hold the page table lock.
> + */
> +void folio_remove_rmap_ptes(struct folio *folio, struct page *page,
> +		unsigned int nr_pages, struct vm_area_struct *vma)
> +{
> +	__folio_remove_rmap(folio, page, nr_pages, vma, RMAP_MODE_PTE);
> +}
> +
> +/**
> + * folio_remove_rmap_pmd - remove a PMD mapping from a page range of a folio
> + * @folio:	The folio to remove the mapping from
> + * @page:	The first page to remove
> + * @vma:	The vm area from which the mapping is removed
> + *
> + * The page range of the folio is defined by [page, page + HPAGE_PMD_NR)
> + *
> + * The caller needs to hold the page table lock.
> + */
> +void folio_remove_rmap_pmd(struct folio *folio, struct page *page,
> +		struct vm_area_struct *vma)
> +{
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +	__folio_remove_rmap(folio, page, HPAGE_PMD_NR, vma, RMAP_MODE_PMD);
> +#else
> +	WARN_ON_ONCE(true);
> +#endif
> +}
> +
>  /*
>   * @arg: enum ttu_flags will be passed to this argument
>   */





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux