Re: [PATCHv2 6/8] khugepaged: Allow to collapse PTE-mapped compound pages

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 4/3/20 4:29 AM, Kirill A. Shutemov wrote:
We can collapse PTE-mapped compound pages. We only need to avoid
handling them more than once: lock/unlock page only once if it's present
in the PMD range multiple times as it handled on compound level. The
same goes for LRU isolation and putback.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
---
  mm/khugepaged.c | 103 ++++++++++++++++++++++++++++++++----------------
  1 file changed, 68 insertions(+), 35 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 1e7e6543ebca..49e56e4e30d1 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -515,23 +515,37 @@ void __khugepaged_exit(struct mm_struct *mm)
static void release_pte_page(struct page *page)
  {
-	dec_node_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page));
+	mod_node_page_state(page_pgdat(page),
+			NR_ISOLATED_ANON + page_is_file_cache(page),
+			-compound_nr(page));
  	unlock_page(page);
  	putback_lru_page(page);
  }
-static void release_pte_pages(pte_t *pte, pte_t *_pte)
+static void release_pte_pages(pte_t *pte, pte_t *_pte,
+		struct list_head *compound_pagelist)
  {
+	struct page *page, *tmp;
+
  	while (--_pte >= pte) {
  		pte_t pteval = *_pte;
-		if (!pte_none(pteval) && !is_zero_pfn(pte_pfn(pteval)))
-			release_pte_page(pte_page(pteval));
+
+		page = pte_page(pteval);
+		if (!pte_none(pteval) && !is_zero_pfn(pte_pfn(pteval)) &&
+				!PageCompound(page))
+			release_pte_page(page);
+	}
+
+	list_for_each_entry_safe(page, tmp, compound_pagelist, lru) {
+		list_del(&page->lru);
+		release_pte_page(page);
  	}
  }
static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
  					unsigned long address,
-					pte_t *pte)
+					pte_t *pte,
+					struct list_head *compound_pagelist)
  {
  	struct page *page = NULL;
  	pte_t *_pte;
@@ -561,13 +575,21 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
  			goto out;
  		}
- /* TODO: teach khugepaged to collapse THP mapped with pte */
+		VM_BUG_ON_PAGE(!PageAnon(page), page);
+
  		if (PageCompound(page)) {
-			result = SCAN_PAGE_COMPOUND;
-			goto out;
-		}
+			struct page *p;
+			page = compound_head(page);
- VM_BUG_ON_PAGE(!PageAnon(page), page);
+			/*
+			 * Check if we have dealt with the compound page
+			 * already
+			 */
+			list_for_each_entry(p, compound_pagelist, lru) {
+				if (page == p)
+					goto next;
+			}
+		}
/*
  		 * We can do it before isolate_lru_page because the
@@ -597,19 +619,15 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
  			result = SCAN_PAGE_COUNT;
  			goto out;
  		}
-		if (pte_write(pteval)) {
-			writable = true;
-		} else {
-			if (PageSwapCache(page) &&
-			    !reuse_swap_page(page, NULL)) {
-				unlock_page(page);
-				result = SCAN_SWAP_CACHE_PAGE;
-				goto out;
-			}
+		if (!pte_write(pteval) && PageSwapCache(page) &&
+				!reuse_swap_page(page, NULL)) {
  			/*
-			 * Page is not in the swap cache. It can be collapsed
-			 * into a THP.
+			 * Page is in the swap cache and cannot be re-used.
+			 * It cannot be collapsed into a THP.
  			 */
+			unlock_page(page);
+			result = SCAN_SWAP_CACHE_PAGE;
+			goto out;
  		}
/*
@@ -621,16 +639,23 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
  			result = SCAN_DEL_PAGE_LRU;
  			goto out;
  		}
-		inc_node_page_state(page,
-				NR_ISOLATED_ANON + page_is_file_cache(page));
+		mod_node_page_state(page_pgdat(page),
+				NR_ISOLATED_ANON + page_is_file_cache(page),
+				compound_nr(page));
  		VM_BUG_ON_PAGE(!PageLocked(page), page);
  		VM_BUG_ON_PAGE(PageLRU(page), page);
+ if (PageCompound(page))
+			list_add_tail(&page->lru, compound_pagelist);
+next:
  		/* There should be enough young pte to collapse the page */
  		if (pte_young(pteval) ||
  		    page_is_young(page) || PageReferenced(page) ||
  		    mmu_notifier_test_young(vma->vm_mm, address))
  			referenced++;
+
+		if (pte_write(pteval))
+			writable = true;
  	}
  	if (likely(writable)) {
  		if (likely(referenced)) {
@@ -644,7 +669,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
  	}
out:
-	release_pte_pages(pte, _pte);
+	release_pte_pages(pte, _pte, compound_pagelist);
  	trace_mm_collapse_huge_page_isolate(page, none_or_zero,
  					    referenced, writable, result);
  	return 0;
@@ -653,13 +678,14 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
  static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
  				      struct vm_area_struct *vma,
  				      unsigned long address,
-				      spinlock_t *ptl)
+				      spinlock_t *ptl,
+				      struct list_head *compound_pagelist)
  {
+	struct page *src_page, *tmp;
  	pte_t *_pte;
  	for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
  				_pte++, page++, address += PAGE_SIZE) {
  		pte_t pteval = *_pte;
-		struct page *src_page;
if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
  			clear_user_highpage(page, address);
@@ -679,7 +705,6 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
  		} else {
  			src_page = pte_page(pteval);
  			copy_user_highpage(page, src_page, address, vma);
-			release_pte_page(src_page);
  			/*
  			 * ptl mostly unnecessary, but preempt has to
  			 * be disabled to update the per-cpu stats
@@ -693,9 +718,18 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
  			pte_clear(vma->vm_mm, address, _pte);
  			page_remove_rmap(src_page, false);
  			spin_unlock(ptl);
-			free_page_and_swap_cache(src_page);
+			if (!PageCompound(src_page)) {
+				release_pte_page(src_page);
+				free_page_and_swap_cache(src_page);
+			}
  		}
  	}
+
+	list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) {
+		list_del(&src_page->lru);
+		release_pte_page(src_page);
+		free_page_and_swap_cache(src_page);
+	}

It looks this may mess up the PTE-mapped THP's refcount if it has multiple PTE-mapped subpages since put_page() is not called for every PTE-mapped subpages.

  }
static void khugepaged_alloc_sleep(void)
@@ -960,6 +994,7 @@ static void collapse_huge_page(struct mm_struct *mm,
  				   struct page **hpage,
  				   int node, int referenced)
  {
+	LIST_HEAD(compound_pagelist);
  	pmd_t *pmd, _pmd;
  	pte_t *pte;
  	pgtable_t pgtable;
@@ -1059,7 +1094,8 @@ static void collapse_huge_page(struct mm_struct *mm,
  	mmu_notifier_invalidate_range_end(&range);
spin_lock(pte_ptl);
-	isolated = __collapse_huge_page_isolate(vma, address, pte);
+	isolated = __collapse_huge_page_isolate(vma, address, pte,
+			&compound_pagelist);
  	spin_unlock(pte_ptl);
if (unlikely(!isolated)) {
@@ -1084,7 +1120,8 @@ static void collapse_huge_page(struct mm_struct *mm,
  	 */
  	anon_vma_unlock_write(vma->anon_vma);
- __collapse_huge_page_copy(pte, new_page, vma, address, pte_ptl);
+	__collapse_huge_page_copy(pte, new_page, vma, address, pte_ptl,
+			&compound_pagelist);
  	pte_unmap(pte);
  	__SetPageUptodate(new_page);
  	pgtable = pmd_pgtable(_pmd);
@@ -1181,11 +1218,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
  			goto out_unmap;
  		}
- /* TODO: teach khugepaged to collapse THP mapped with pte */
-		if (PageCompound(page)) {
-			result = SCAN_PAGE_COMPOUND;
-			goto out_unmap;
-		}
+		page = compound_head(page);
/*
  		 * Record which node the original page is from and save this





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux