Add enforce_thp_enabled flag to struct collapse_control that allows context to ignore constraints imposed by /sys/kernel/transparent_hugepage/enabled. This flag is set in khugepaged collapse context to preserve existing khugepaged behavior. This flag will be used (unset) when introducing madvise collapse context since the desired THP semantics of MADV_COLLAPSE aren't coupled to sysfs THP settings. Most notably, for the purpose of eventual madvise_collapse(2) support, this allows userspace to trigger THP collapse on behalf of another processes, without adding support to meddle with the VMA flags of said process, or change sysfs THP settings. For now, limit this flag to /sys/kernel/transparent_hugepage/enabled, but it can be expanded to include /sys/kernel/transparent_hugepage/shmem_enabled later. Link: https://lore.kernel.org/linux-mm/CAAa6QmQxay1_=Pmt8oCX2-Va18t44FV-Vs-WsQt_6+qBks4nZA@xxxxxxxxxxxxxx/ Signed-off-by: Zach O'Keefe <zokeefe@xxxxxxxxxx> --- mm/khugepaged.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index c3589b3e238d..4ad04f552347 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -94,6 +94,11 @@ struct collapse_control { */ bool enforce_page_heuristics; + /* Enforce constraints of + * /sys/kernel/mm/transparent_hugepage/enabled + */ + bool enforce_thp_enabled; + /* Num pages scanned per node */ int node_load[MAX_NUMNODES]; @@ -893,10 +898,12 @@ static bool khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node) */ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address, - struct vm_area_struct **vmap) + struct vm_area_struct **vmap, + struct collapse_control *cc) { struct vm_area_struct *vma; unsigned long hstart, hend; + unsigned long vma_flags; if (unlikely(khugepaged_test_exit(mm))) return SCAN_ANY_PROCESS; @@ -909,7 +916,18 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address, hend = vma->vm_end & HPAGE_PMD_MASK; if (address < hstart || address + HPAGE_PMD_SIZE > hend) return SCAN_ADDRESS_RANGE; - if (!hugepage_vma_check(vma, vma->vm_flags)) + + /* + * If !cc->enforce_thp_enabled, set VM_HUGEPAGE so that + * hugepage_vma_check() can pass even if + * TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG is set (i.e. "madvise" mode). + * Note that hugepage_vma_check() doesn't enforce that + * TRANSPARENT_HUGEPAGE_FLAG or TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG + * must be set (i.e. "never" mode). + */ + vma_flags = cc->enforce_thp_enabled ? vma->vm_flags + : vma->vm_flags | VM_HUGEPAGE; + if (!hugepage_vma_check(vma, vma_flags)) return SCAN_VMA_CHECK; /* Anon VMA expected */ if (!vma->anon_vma || !vma_is_anonymous(vma)) @@ -953,7 +971,8 @@ static int find_pmd_or_thp_or_none(struct mm_struct *mm, static bool __collapse_huge_page_swapin(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, - int referenced) + int referenced, + struct collapse_control *cc) { int swapped_in = 0; vm_fault_t ret = 0; @@ -980,7 +999,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, /* do_swap_page returns VM_FAULT_RETRY with released mmap_lock */ if (ret & VM_FAULT_RETRY) { mmap_read_lock(mm); - if (hugepage_vma_revalidate(mm, haddr, &vma)) { + if (hugepage_vma_revalidate(mm, haddr, &vma, cc)) { /* vma is no longer available, don't continue to swapin */ trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); return false; @@ -1047,7 +1066,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, goto out_nolock; mmap_read_lock(mm); - result = hugepage_vma_revalidate(mm, address, &vma); + result = hugepage_vma_revalidate(mm, address, &vma, cc); if (result) { mmap_read_unlock(mm); goto out_nolock; @@ -1066,7 +1085,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, * Continuing to collapse causes inconsistency. */ if (unmapped && !__collapse_huge_page_swapin(mm, vma, address, - pmd, referenced)) { + pmd, referenced, cc)) { mmap_read_unlock(mm); goto out_nolock; } @@ -1078,7 +1097,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, * handled by the anon_vma lock + PG_lock. */ mmap_write_lock(mm); - result = hugepage_vma_revalidate(mm, address, &vma); + result = hugepage_vma_revalidate(mm, address, &vma, cc); if (result) goto out_up_write; /* check if the pmd is still valid */ @@ -2277,6 +2296,7 @@ static int khugepaged(void *none) struct mm_slot *mm_slot; struct collapse_control cc = { .enforce_page_heuristics = true, + .enforce_thp_enabled = true, .last_target_node = NUMA_NO_NODE, /* .gfp set later */ }; -- 2.36.1.255.ge46751e96f-goog