[PATCH v6 08/15] mm/khugepaged: add flag to ignore THP sysfs enabled

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add enforce_thp_enabled flag to struct collapse_control that allows context
to ignore constraints imposed by /sys/kernel/transparent_hugepage/enabled.

This flag is set in khugepaged collapse context to preserve existing
khugepaged behavior.

This flag will be used (unset) when introducing madvise collapse
context since the desired THP semantics of MADV_COLLAPSE aren't coupled
to sysfs THP settings.  Most notably, for the purpose of eventual
madvise_collapse(2) support, this allows userspace to trigger THP collapse
on behalf of another processes, without adding support to meddle with
the VMA flags of said process, or change sysfs THP settings.

For now, limit this flag to /sys/kernel/transparent_hugepage/enabled,
but it can be expanded to include
/sys/kernel/transparent_hugepage/shmem_enabled later.

Link: https://lore.kernel.org/linux-mm/CAAa6QmQxay1_=Pmt8oCX2-Va18t44FV-Vs-WsQt_6+qBks4nZA@xxxxxxxxxxxxxx/

Signed-off-by: Zach O'Keefe <zokeefe@xxxxxxxxxx>
---
 mm/khugepaged.c | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index c3589b3e238d..4ad04f552347 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -94,6 +94,11 @@ struct collapse_control {
 	 */
 	bool enforce_page_heuristics;
 
+	/* Enforce constraints of
+	 * /sys/kernel/mm/transparent_hugepage/enabled
+	 */
+	bool enforce_thp_enabled;
+
 	/* Num pages scanned per node */
 	int node_load[MAX_NUMNODES];
 
@@ -893,10 +898,12 @@ static bool khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node)
  */
 
 static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
-		struct vm_area_struct **vmap)
+				   struct vm_area_struct **vmap,
+				   struct collapse_control *cc)
 {
 	struct vm_area_struct *vma;
 	unsigned long hstart, hend;
+	unsigned long vma_flags;
 
 	if (unlikely(khugepaged_test_exit(mm)))
 		return SCAN_ANY_PROCESS;
@@ -909,7 +916,18 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
 	hend = vma->vm_end & HPAGE_PMD_MASK;
 	if (address < hstart || address + HPAGE_PMD_SIZE > hend)
 		return SCAN_ADDRESS_RANGE;
-	if (!hugepage_vma_check(vma, vma->vm_flags))
+
+	/*
+	 * If !cc->enforce_thp_enabled, set VM_HUGEPAGE so that
+	 * hugepage_vma_check() can pass even if
+	 * TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG is set (i.e. "madvise" mode).
+	 * Note that hugepage_vma_check() doesn't enforce that
+	 * TRANSPARENT_HUGEPAGE_FLAG or TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG
+	 * must be set (i.e. "never" mode).
+	 */
+	vma_flags = cc->enforce_thp_enabled ?  vma->vm_flags
+			: vma->vm_flags | VM_HUGEPAGE;
+	if (!hugepage_vma_check(vma, vma_flags))
 		return SCAN_VMA_CHECK;
 	/* Anon VMA expected */
 	if (!vma->anon_vma || !vma_is_anonymous(vma))
@@ -953,7 +971,8 @@ static int find_pmd_or_thp_or_none(struct mm_struct *mm,
 static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 					struct vm_area_struct *vma,
 					unsigned long haddr, pmd_t *pmd,
-					int referenced)
+					int referenced,
+					struct collapse_control *cc)
 {
 	int swapped_in = 0;
 	vm_fault_t ret = 0;
@@ -980,7 +999,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 		/* do_swap_page returns VM_FAULT_RETRY with released mmap_lock */
 		if (ret & VM_FAULT_RETRY) {
 			mmap_read_lock(mm);
-			if (hugepage_vma_revalidate(mm, haddr, &vma)) {
+			if (hugepage_vma_revalidate(mm, haddr, &vma, cc)) {
 				/* vma is no longer available, don't continue to swapin */
 				trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
 				return false;
@@ -1047,7 +1066,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
 		goto out_nolock;
 
 	mmap_read_lock(mm);
-	result = hugepage_vma_revalidate(mm, address, &vma);
+	result = hugepage_vma_revalidate(mm, address, &vma, cc);
 	if (result) {
 		mmap_read_unlock(mm);
 		goto out_nolock;
@@ -1066,7 +1085,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
 	 * Continuing to collapse causes inconsistency.
 	 */
 	if (unmapped && !__collapse_huge_page_swapin(mm, vma, address,
-						     pmd, referenced)) {
+						     pmd, referenced, cc)) {
 		mmap_read_unlock(mm);
 		goto out_nolock;
 	}
@@ -1078,7 +1097,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
 	 * handled by the anon_vma lock + PG_lock.
 	 */
 	mmap_write_lock(mm);
-	result = hugepage_vma_revalidate(mm, address, &vma);
+	result = hugepage_vma_revalidate(mm, address, &vma, cc);
 	if (result)
 		goto out_up_write;
 	/* check if the pmd is still valid */
@@ -2277,6 +2296,7 @@ static int khugepaged(void *none)
 	struct mm_slot *mm_slot;
 	struct collapse_control cc = {
 		.enforce_page_heuristics = true,
+		.enforce_thp_enabled = true,
 		.last_target_node = NUMA_NO_NODE,
 		/* .gfp set later  */
 	};
-- 
2.36.1.255.ge46751e96f-goog





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux