+ mm-khugepaged-add-struct-collapse_control.patch added to mm-unstable branch

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled
     Subject: mm/khugepaged: add struct collapse_control
has been added to the -mm mm-unstable branch.  Its filename is
     mm-khugepaged-add-struct-collapse_control.patch

This patch will shortly appear at
     https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-khugepaged-add-struct-collapse_control.patch

This patch will later appear in the mm-unstable branch at
    git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***

The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days

------------------------------------------------------
From: "Zach O'Keefe" <zokeefe@xxxxxxxxxx>
Subject: mm/khugepaged: add struct collapse_control
Date: Wed, 6 Jul 2022 16:59:21 -0700

Modularize hugepage collapse by introducing struct collapse_control.  This
structure serves to describe the properties of the requested collapse, as
well as serve as a local scratch pad to use during the collapse itself.

Start by moving global per-node khugepaged statistics into this new
structure.  Note that this structure is still statically allocated since
CONFIG_NODES_SHIFT might be arbitrary large, and stack-allocating a
MAX_NUMNODES-sized array could cause -Wframe-large-than= errors.

Link: https://lkml.kernel.org/r/20220706235936.2197195-4-zokeefe@xxxxxxxxxx
Signed-off-by: Zach O'Keefe <zokeefe@xxxxxxxxxx>
Cc: Alex Shi <alex.shi@xxxxxxxxxxxxxxxxx>
Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Cc: Arnd Bergmann <arnd@xxxxxxxx>
Cc: Axel Rasmussen <axelrasmussen@xxxxxxxxxx>
Cc: Chris Kennelly <ckennelly@xxxxxxxxxx>
Cc: Chris Zankel <chris@xxxxxxxxxx>
Cc: David Hildenbrand <david@xxxxxxxxxx>
Cc: David Rientjes <rientjes@xxxxxxxxxx>
Cc: Helge Deller <deller@xxxxxx>
Cc: Hugh Dickins <hughd@xxxxxxxxxx>
Cc: Ivan Kokshaysky <ink@xxxxxxxxxxxxxxxxxxxx>
Cc: James Bottomley <James.Bottomley@xxxxxxxxxxxxxxxxxxxxx>
Cc: Jens Axboe <axboe@xxxxxxxxx>
Cc: "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx>
Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx>
Cc: Matt Turner <mattst88@xxxxxxxxx>
Cc: Max Filippov <jcmvbkbc@xxxxxxxxx>
Cc: Miaohe Lin <linmiaohe@xxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxx>
Cc: Minchan Kim <minchan@xxxxxxxxxx>
Cc: Pasha Tatashin <pasha.tatashin@xxxxxxxxxx>
Cc: Pavel Begunkov <asml.silence@xxxxxxxxx>
Cc: Peter Xu <peterx@xxxxxxxxxx>
Cc: Rongwei Wang <rongwei.wang@xxxxxxxxxxxxxxxxx>
Cc: SeongJae Park <sj@xxxxxxxxxx>
Cc: Song Liu <songliubraving@xxxxxx>
Cc: Thomas Bogendoerfer <tsbogend@xxxxxxxxxxxxxxxx>
Cc: Vlastimil Babka <vbabka@xxxxxxx>
Cc: Yang Shi <shy828301@xxxxxxxxx>
Cc: Zi Yan <ziy@xxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---

 mm/khugepaged.c |   87 ++++++++++++++++++++++++++--------------------
 1 file changed, 50 insertions(+), 37 deletions(-)

--- a/mm/khugepaged.c~mm-khugepaged-add-struct-collapse_control
+++ a/mm/khugepaged.c
@@ -85,6 +85,14 @@ static struct kmem_cache *mm_slot_cache
 
 #define MAX_PTE_MAPPED_THP 8
 
+struct collapse_control {
+	/* Num pages scanned per node */
+	int node_load[MAX_NUMNODES];
+
+	/* Last target selected in khugepaged_find_target_node() */
+	int last_target_node;
+};
+
 /**
  * struct mm_slot - hash lookup from mm to mm_slot
  * @hash: hash collision list
@@ -735,9 +743,12 @@ static void khugepaged_alloc_sleep(void)
 	remove_wait_queue(&khugepaged_wait, &wait);
 }
 
-static int khugepaged_node_load[MAX_NUMNODES];
 
-static bool khugepaged_scan_abort(int nid)
+struct collapse_control khugepaged_collapse_control = {
+	.last_target_node = NUMA_NO_NODE,
+};
+
+static bool khugepaged_scan_abort(int nid, struct collapse_control *cc)
 {
 	int i;
 
@@ -749,11 +760,11 @@ static bool khugepaged_scan_abort(int ni
 		return false;
 
 	/* If there is a count for this node already, it must be acceptable */
-	if (khugepaged_node_load[nid])
+	if (cc->node_load[nid])
 		return false;
 
 	for (i = 0; i < MAX_NUMNODES; i++) {
-		if (!khugepaged_node_load[i])
+		if (!cc->node_load[i])
 			continue;
 		if (node_distance(nid, i) > node_reclaim_distance)
 			return true;
@@ -772,32 +783,31 @@ static inline gfp_t alloc_hugepage_khuge
 }
 
 #ifdef CONFIG_NUMA
-static int khugepaged_find_target_node(void)
+static int khugepaged_find_target_node(struct collapse_control *cc)
 {
-	static int last_khugepaged_target_node = NUMA_NO_NODE;
 	int nid, target_node = 0, max_value = 0;
 
 	/* find first node with max normal pages hit */
 	for (nid = 0; nid < MAX_NUMNODES; nid++)
-		if (khugepaged_node_load[nid] > max_value) {
-			max_value = khugepaged_node_load[nid];
+		if (cc->node_load[nid] > max_value) {
+			max_value = cc->node_load[nid];
 			target_node = nid;
 		}
 
 	/* do some balance if several nodes have the same hit record */
-	if (target_node <= last_khugepaged_target_node)
-		for (nid = last_khugepaged_target_node + 1; nid < MAX_NUMNODES;
-				nid++)
-			if (max_value == khugepaged_node_load[nid]) {
+	if (target_node <= cc->last_target_node)
+		for (nid = cc->last_target_node + 1; nid < MAX_NUMNODES;
+		     nid++)
+			if (max_value == cc->node_load[nid]) {
 				target_node = nid;
 				break;
 			}
 
-	last_khugepaged_target_node = target_node;
+	cc->last_target_node = target_node;
 	return target_node;
 }
 #else
-static int khugepaged_find_target_node(void)
+static int khugepaged_find_target_node(struct collapse_control *cc)
 {
 	return 0;
 }
@@ -1075,10 +1085,9 @@ out_nolock:
 	return;
 }
 
-static int khugepaged_scan_pmd(struct mm_struct *mm,
-			       struct vm_area_struct *vma,
-			       unsigned long address,
-			       struct page **hpage)
+static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
+			       unsigned long address, struct page **hpage,
+			       struct collapse_control *cc)
 {
 	pmd_t *pmd;
 	pte_t *pte, *_pte;
@@ -1098,7 +1107,7 @@ static int khugepaged_scan_pmd(struct mm
 		goto out;
 	}
 
-	memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
+	memset(cc->node_load, 0, sizeof(cc->node_load));
 	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
 	for (_address = address, _pte = pte; _pte < pte + HPAGE_PMD_NR;
 	     _pte++, _address += PAGE_SIZE) {
@@ -1164,16 +1173,16 @@ static int khugepaged_scan_pmd(struct mm
 
 		/*
 		 * Record which node the original page is from and save this
-		 * information to khugepaged_node_load[].
+		 * information to cc->node_load[].
 		 * Khugepaged will allocate hugepage from the node has the max
 		 * hit record.
 		 */
 		node = page_to_nid(page);
-		if (khugepaged_scan_abort(node)) {
+		if (khugepaged_scan_abort(node, cc)) {
 			result = SCAN_SCAN_ABORT;
 			goto out_unmap;
 		}
-		khugepaged_node_load[node]++;
+		cc->node_load[node]++;
 		if (!PageLRU(page)) {
 			result = SCAN_PAGE_LRU;
 			goto out_unmap;
@@ -1224,7 +1233,7 @@ static int khugepaged_scan_pmd(struct mm
 out_unmap:
 	pte_unmap_unlock(pte, ptl);
 	if (ret) {
-		node = khugepaged_find_target_node();
+		node = khugepaged_find_target_node(cc);
 		/* collapse_huge_page will return with the mmap_lock released */
 		collapse_huge_page(mm, address, hpage, node,
 				referenced, unmapped);
@@ -1879,8 +1888,9 @@ out:
 	/* TODO: tracepoints */
 }
 
-static void khugepaged_scan_file(struct mm_struct *mm,
-		struct file *file, pgoff_t start, struct page **hpage)
+static void khugepaged_scan_file(struct mm_struct *mm, struct file *file,
+				 pgoff_t start, struct page **hpage,
+				 struct collapse_control *cc)
 {
 	struct page *page = NULL;
 	struct address_space *mapping = file->f_mapping;
@@ -1891,7 +1901,7 @@ static void khugepaged_scan_file(struct
 
 	present = 0;
 	swap = 0;
-	memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
+	memset(cc->node_load, 0, sizeof(cc->node_load));
 	rcu_read_lock();
 	xas_for_each(&xas, page, start + HPAGE_PMD_NR - 1) {
 		if (xas_retry(&xas, page))
@@ -1916,11 +1926,11 @@ static void khugepaged_scan_file(struct
 		}
 
 		node = page_to_nid(page);
-		if (khugepaged_scan_abort(node)) {
+		if (khugepaged_scan_abort(node, cc)) {
 			result = SCAN_SCAN_ABORT;
 			break;
 		}
-		khugepaged_node_load[node]++;
+		cc->node_load[node]++;
 
 		if (!PageLRU(page)) {
 			result = SCAN_PAGE_LRU;
@@ -1953,7 +1963,7 @@ static void khugepaged_scan_file(struct
 			result = SCAN_EXCEED_NONE_PTE;
 			count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
 		} else {
-			node = khugepaged_find_target_node();
+			node = khugepaged_find_target_node(cc);
 			collapse_file(mm, file, start, hpage, node);
 		}
 	}
@@ -1961,8 +1971,9 @@ static void khugepaged_scan_file(struct
 	/* TODO: tracepoints */
 }
 #else
-static void khugepaged_scan_file(struct mm_struct *mm,
-		struct file *file, pgoff_t start, struct page **hpage)
+static void khugepaged_scan_file(struct mm_struct *mm, struct file *file,
+				 pgoff_t start, struct page **hpage,
+				 struct collapse_control *cc)
 {
 	BUILD_BUG();
 }
@@ -1973,7 +1984,8 @@ static void khugepaged_collapse_pte_mapp
 #endif
 
 static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
-					    struct page **hpage)
+					    struct page **hpage,
+					    struct collapse_control *cc)
 	__releases(&khugepaged_mm_lock)
 	__acquires(&khugepaged_mm_lock)
 {
@@ -2050,12 +2062,13 @@ skip:
 
 				mmap_read_unlock(mm);
 				ret = 1;
-				khugepaged_scan_file(mm, file, pgoff, hpage);
+				khugepaged_scan_file(mm, file, pgoff, hpage,
+						     cc);
 				fput(file);
 			} else {
 				ret = khugepaged_scan_pmd(mm, vma,
 						khugepaged_scan.address,
-						hpage);
+						hpage, cc);
 			}
 			/* move to next address */
 			khugepaged_scan.address += HPAGE_PMD_SIZE;
@@ -2111,7 +2124,7 @@ static int khugepaged_wait_event(void)
 		kthread_should_stop();
 }
 
-static void khugepaged_do_scan(void)
+static void khugepaged_do_scan(struct collapse_control *cc)
 {
 	struct page *hpage = NULL;
 	unsigned int progress = 0, pass_through_head = 0;
@@ -2132,7 +2145,7 @@ static void khugepaged_do_scan(void)
 		if (khugepaged_has_work() &&
 		    pass_through_head < 2)
 			progress += khugepaged_scan_mm_slot(pages - progress,
-							    &hpage);
+							    &hpage, cc);
 		else
 			progress = pages;
 		spin_unlock(&khugepaged_mm_lock);
@@ -2188,7 +2201,7 @@ static int khugepaged(void *none)
 	set_user_nice(current, MAX_NICE);
 
 	while (!kthread_should_stop()) {
-		khugepaged_do_scan();
+		khugepaged_do_scan(&khugepaged_collapse_control);
 		khugepaged_wait_work();
 	}
 
_

Patches currently in -mm which might be from zokeefe@xxxxxxxxxx are

mm-khugepaged-remove-redundant-transhuge_vma_suitable-check.patch
mm-khugepaged-add-struct-collapse_control.patch
mm-khugepaged-dedup-and-simplify-hugepage-alloc-and-charging.patch
mm-khugepaged-pipe-enum-scan_result-codes-back-to-callers.patch
mm-khugepaged-add-flag-to-predicate-khugepaged-only-behavior.patch
mm-thp-add-flag-to-enforce-sysfs-thp-in-hugepage_vma_check.patch
mm-khugepaged-record-scan_pmd_mapped-when-scan_pmd-finds-hugepage.patch
mm-madvise-introduce-madv_collapse-sync-hugepage-collapse.patch
mm-khugepaged-rename-prefix-of-shared-collapse-functions.patch
mm-madvise-add-huge_memory-mm_madvise_collapse-tracepoint.patch
mm-madvise-add-madv_collapse-to-process_madvise.patch
proc-smaps-add-pmdmappable-field-to-smaps.patch
selftests-vm-modularize-collapse-selftests.patch
selftests-vm-dedup-hugepage-allocation-logic.patch
selftests-vm-add-madv_collapse-collapse-context-to-selftests.patch
selftests-vm-add-selftest-to-verify-recollapse-of-thps.patch
selftests-vm-add-selftest-to-verify-multi-thp-collapse.patch




[Index of Archives]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux