On Tue, Mar 8, 2022 at 1:35 PM Zach O'Keefe <zokeefe@xxxxxxxxxx> wrote: > > In madvise collapse context, we optionally want to be able to ignore > advice from MADV_NOHUGEPAGE-marked regions. Could you please elaborate why this usecase is valid? Typically MADV_NOHUGEPAGE is set when the users really don't want to have THP for this area. So it doesn't make too much sense to ignore it IMHO. > > Add a vm_flags_ignore argument to hugepage_vma_revalidate_pmd_count() > which can be used to ignore vm flags used when considering thp > eligibility. > > Signed-off-by: Zach O'Keefe <zokeefe@xxxxxxxxxx> > --- > mm/khugepaged.c | 18 ++++++++++++------ > 1 file changed, 12 insertions(+), 6 deletions(-) > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index 1d20be47bcea..ecbd3fc41c80 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -964,10 +964,14 @@ khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node) > #endif > > /* > - * Revalidate a vma's eligibility to collapse nr hugepages. > + * Revalidate a vma's eligibility to collapse nr hugepages. vm_flags_ignore > + * can be used to ignore certain vma_flags that would otherwise be checked - > + * the principal example being VM_NOHUGEPAGE which is ignored in madvise > + * collapse context. > */ > static int hugepage_vma_revalidate_pmd_count(struct mm_struct *mm, > unsigned long address, int nr, > + unsigned long vm_flags_ignore, > struct vm_area_struct **vmap) > { > struct vm_area_struct *vma; > @@ -986,7 +990,7 @@ static int hugepage_vma_revalidate_pmd_count(struct mm_struct *mm, > hend = vma->vm_end & HPAGE_PMD_MASK; > if (address < hstart || (address + nr * HPAGE_PMD_SIZE) > hend) > return SCAN_ADDRESS_RANGE; > - if (!hugepage_vma_check(vma, vma->vm_flags)) > + if (!hugepage_vma_check(vma, vma->vm_flags & ~vm_flags_ignore)) > return SCAN_VMA_CHECK; > /* Anon VMA expected */ > if (!vma->anon_vma || vma->vm_ops) > @@ -1000,9 +1004,11 @@ static int hugepage_vma_revalidate_pmd_count(struct mm_struct *mm, > */ > > static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address, > + unsigned long vm_flags_ignore, > struct vm_area_struct **vmap) > { > - return hugepage_vma_revalidate_pmd_count(mm, address, 1, vmap); > + return hugepage_vma_revalidate_pmd_count(mm, address, 1, > + vm_flags_ignore, vmap); > } > > /* > @@ -1043,7 +1049,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, > /* do_swap_page returns VM_FAULT_RETRY with released mmap_lock */ > if (ret & VM_FAULT_RETRY) { > mmap_read_lock(mm); > - if (hugepage_vma_revalidate(mm, haddr, &vma)) { > + if (hugepage_vma_revalidate(mm, haddr, VM_NONE, &vma)) { > /* vma is no longer available, don't continue to swapin */ > trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); > return false; > @@ -1200,7 +1206,7 @@ static void collapse_huge_page(struct mm_struct *mm, > count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC); > > mmap_read_lock(mm); > - result = hugepage_vma_revalidate(mm, address, &vma); > + result = hugepage_vma_revalidate(mm, address, VM_NONE, &vma); > if (result) { > mmap_read_unlock(mm); > goto out_nolock; > @@ -1232,7 +1238,7 @@ static void collapse_huge_page(struct mm_struct *mm, > */ > mmap_write_lock(mm); > > - result = hugepage_vma_revalidate(mm, address, &vma); > + result = hugepage_vma_revalidate(mm, address, VM_NONE, &vma); > if (result) > goto out_up_write; > /* check if the pmd is still valid */ > -- > 2.35.1.616.g0bdcbb4464-goog >