[RFC PATCH 04/14] mm/khugepaged: separate khugepaged_scan_pmd() scan and collapse

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



khugepaged_scan_pmd() currently does : (1) scan pmd to see if it's
suitable for collapse, then (2) do the collapse, if scan succeeds.

Separate out (1) so that it can be reused by itself later in the
series, and introduce a struct scan_pmd_result to gather data about the
scan.

Signed-off-by: Zach O'Keefe <zokeefe@xxxxxxxxxx>
---
 mm/khugepaged.c | 107 ++++++++++++++++++++++++++++++------------------
 1 file changed, 67 insertions(+), 40 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index e3399a451662..b204bc1eefa7 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1244,27 +1244,34 @@ static void collapse_huge_page(struct mm_struct *mm,
 	return;
 }
 
-static int khugepaged_scan_pmd(struct mm_struct *mm,
-			       struct vm_area_struct *vma,
-			       unsigned long address,
-			       struct page **hpage,
-			       struct collapse_control *cc)
+struct scan_pmd_result {
+	int result;
+	bool writable;
+	int referenced;
+	int unmapped;
+	int none_or_zero;
+	struct page *head;
+};
+
+static void scan_pmd(struct mm_struct *mm,
+		     struct vm_area_struct *vma,
+		     unsigned long address,
+		     struct collapse_control *cc,
+		     struct scan_pmd_result *scan_result)
 {
 	pmd_t *pmd;
 	pte_t *pte, *_pte;
-	int ret = 0, result = 0, referenced = 0;
-	int none_or_zero = 0, shared = 0;
+	int shared = 0;
 	struct page *page = NULL;
 	unsigned long _address;
 	spinlock_t *ptl;
-	int node = NUMA_NO_NODE, unmapped = 0;
-	bool writable = false;
+	int node = NUMA_NO_NODE;
 
 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
 
 	pmd = mm_find_pmd(mm, address);
 	if (!pmd) {
-		result = SCAN_PMD_NULL;
+		scan_result->result = SCAN_PMD_NULL;
 		goto out;
 	}
 
@@ -1274,7 +1281,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
 	     _pte++, _address += PAGE_SIZE) {
 		pte_t pteval = *_pte;
 		if (is_swap_pte(pteval)) {
-			if (++unmapped <= khugepaged_max_ptes_swap ||
+			if (++scan_result->unmapped <=
+				    khugepaged_max_ptes_swap ||
 			    !cc->enforce_pte_scan_limits) {
 				/*
 				 * Always be strict with uffd-wp
@@ -1282,23 +1290,24 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
 				 * comment below for pte_uffd_wp().
 				 */
 				if (pte_swp_uffd_wp(pteval)) {
-					result = SCAN_PTE_UFFD_WP;
+					scan_result->result = SCAN_PTE_UFFD_WP;
 					goto out_unmap;
 				}
 				continue;
 			} else {
-				result = SCAN_EXCEED_SWAP_PTE;
+				scan_result->result = SCAN_EXCEED_SWAP_PTE;
 				count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
 				goto out_unmap;
 			}
 		}
 		if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
 			if (!userfaultfd_armed(vma) &&
-			    (++none_or_zero <= khugepaged_max_ptes_none ||
+			    (++scan_result->none_or_zero <=
+			     khugepaged_max_ptes_none ||
 			     !cc->enforce_pte_scan_limits)) {
 				continue;
 			} else {
-				result = SCAN_EXCEED_NONE_PTE;
+				scan_result->result = SCAN_EXCEED_NONE_PTE;
 				count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
 				goto out_unmap;
 			}
@@ -1313,22 +1322,22 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
 			 * userfault messages that falls outside of
 			 * the registered range.  So, just be simple.
 			 */
-			result = SCAN_PTE_UFFD_WP;
+			scan_result->result = SCAN_PTE_UFFD_WP;
 			goto out_unmap;
 		}
 		if (pte_write(pteval))
-			writable = true;
+			scan_result->writable = true;
 
 		page = vm_normal_page(vma, _address, pteval);
 		if (unlikely(!page)) {
-			result = SCAN_PAGE_NULL;
+			scan_result->result = SCAN_PAGE_NULL;
 			goto out_unmap;
 		}
 
 		if (page_mapcount(page) > 1 &&
 				++shared > khugepaged_max_ptes_shared &&
 				cc->enforce_pte_scan_limits) {
-			result = SCAN_EXCEED_SHARED_PTE;
+			scan_result->result = SCAN_EXCEED_SHARED_PTE;
 			count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
 			goto out_unmap;
 		}
@@ -1338,25 +1347,25 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
 		/*
 		 * Record which node the original page is from and save this
 		 * information to cc->node_load[].
-		 * Khugepaged will allocate hugepage from the node has the max
+		 * Caller should allocate hugepage from the node has the max
 		 * hit record.
 		 */
 		node = page_to_nid(page);
 		if (khugepaged_scan_abort(node, cc)) {
-			result = SCAN_SCAN_ABORT;
+			scan_result->result = SCAN_SCAN_ABORT;
 			goto out_unmap;
 		}
 		cc->node_load[node]++;
 		if (!PageLRU(page)) {
-			result = SCAN_PAGE_LRU;
+			scan_result->result = SCAN_PAGE_LRU;
 			goto out_unmap;
 		}
 		if (PageLocked(page)) {
-			result = SCAN_PAGE_LOCK;
+			scan_result->result = SCAN_PAGE_LOCK;
 			goto out_unmap;
 		}
 		if (!PageAnon(page)) {
-			result = SCAN_PAGE_ANON;
+			scan_result->result = SCAN_PAGE_ANON;
 			goto out_unmap;
 		}
 
@@ -1378,35 +1387,53 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
 		 * will be done again later the risk seems low.
 		 */
 		if (!is_refcount_suitable(page)) {
-			result = SCAN_PAGE_COUNT;
+			scan_result->result = SCAN_PAGE_COUNT;
 			goto out_unmap;
 		}
 		if (pte_young(pteval) ||
 		    page_is_young(page) || PageReferenced(page) ||
 		    mmu_notifier_test_young(vma->vm_mm, address))
-			referenced++;
+			scan_result->referenced++;
 	}
-	if (!writable) {
-		result = SCAN_PAGE_RO;
-	} else if (!referenced || (unmapped && referenced < HPAGE_PMD_NR/2)) {
-		result = SCAN_LACK_REFERENCED_PAGE;
+	if (!scan_result->writable) {
+		scan_result->result = SCAN_PAGE_RO;
+	} else if (!scan_result->referenced ||
+		   (scan_result->unmapped &&
+		    scan_result->referenced < HPAGE_PMD_NR / 2)) {
+		scan_result->result = SCAN_LACK_REFERENCED_PAGE;
 	} else {
-		result = SCAN_SUCCEED;
-		ret = 1;
+		scan_result->result = SCAN_SUCCEED;
 	}
 out_unmap:
 	pte_unmap_unlock(pte, ptl);
-	if (ret) {
+out:
+	scan_result->head = page;
+}
+
+static int khugepaged_scan_pmd(struct mm_struct *mm,
+			       struct vm_area_struct *vma,
+			       unsigned long address,
+			       struct page **hpage,
+			       struct collapse_control *cc)
+{
+	int node;
+	struct scan_pmd_result scan_result = {};
+
+	scan_pmd(mm, vma, address, cc, &scan_result);
+	if (scan_result.result == SCAN_SUCCEED) {
 		node = khugepaged_find_target_node(cc);
 		/* collapse_huge_page will return with the mmap_lock released */
-		collapse_huge_page(mm, address, hpage, node,
-				referenced, unmapped,
-				cc->enforce_pte_scan_limits);
+		collapse_huge_page(mm, khugepaged_scan.address, hpage, node,
+				   scan_result.referenced, scan_result.unmapped,
+				   cc->enforce_pte_scan_limits);
 	}
-out:
-	trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
-				     none_or_zero, result, unmapped);
-	return ret;
+
+	trace_mm_khugepaged_scan_pmd(mm, scan_result.head, scan_result.writable,
+				     scan_result.referenced,
+				     scan_result.none_or_zero,
+				     scan_result.result, scan_result.unmapped);
+
+	return scan_result.result == SCAN_SUCCEED;
 }
 
 static void collect_mm_slot(struct mm_slot *mm_slot)
-- 
2.35.1.616.g0bdcbb4464-goog





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux