Re: [PATCH v6 08/15] mm/khugepaged: add flag to ignore THP sysfs enabled

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Jun 3, 2022 at 5:40 PM Zach O'Keefe <zokeefe@xxxxxxxxxx> wrote:
>
> Add enforce_thp_enabled flag to struct collapse_control that allows context
> to ignore constraints imposed by /sys/kernel/transparent_hugepage/enabled.
>
> This flag is set in khugepaged collapse context to preserve existing
> khugepaged behavior.
>
> This flag will be used (unset) when introducing madvise collapse
> context since the desired THP semantics of MADV_COLLAPSE aren't coupled
> to sysfs THP settings.  Most notably, for the purpose of eventual
> madvise_collapse(2) support, this allows userspace to trigger THP collapse
> on behalf of another processes, without adding support to meddle with
> the VMA flags of said process, or change sysfs THP settings.
>
> For now, limit this flag to /sys/kernel/transparent_hugepage/enabled,
> but it can be expanded to include
> /sys/kernel/transparent_hugepage/shmem_enabled later.
>
> Link: https://lore.kernel.org/linux-mm/CAAa6QmQxay1_=Pmt8oCX2-Va18t44FV-Vs-WsQt_6+qBks4nZA@xxxxxxxxxxxxxx/
>
> Signed-off-by: Zach O'Keefe <zokeefe@xxxxxxxxxx>

Looks good to me. Reviewed-by: Yang Shi <shy828301@xxxxxxxxx>

Just a reminder, I just posted series
https://lore.kernel.org/linux-mm/20220606214414.736109-1-shy828301@xxxxxxxxx/T/#m5dae2dfa4b247f3b3903951dd3a1f0978a927e16,
it changed some logic in hugepage_vma_check(). If your series gets in
after it, you should need some additional tweaks to disregard sys THP
setting.

> ---
>  mm/khugepaged.c | 34 +++++++++++++++++++++++++++-------
>  1 file changed, 27 insertions(+), 7 deletions(-)
>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index c3589b3e238d..4ad04f552347 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -94,6 +94,11 @@ struct collapse_control {
>          */
>         bool enforce_page_heuristics;
>
> +       /* Enforce constraints of
> +        * /sys/kernel/mm/transparent_hugepage/enabled
> +        */
> +       bool enforce_thp_enabled;
> +
>         /* Num pages scanned per node */
>         int node_load[MAX_NUMNODES];
>
> @@ -893,10 +898,12 @@ static bool khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node)
>   */
>
>  static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
> -               struct vm_area_struct **vmap)
> +                                  struct vm_area_struct **vmap,
> +                                  struct collapse_control *cc)
>  {
>         struct vm_area_struct *vma;
>         unsigned long hstart, hend;
> +       unsigned long vma_flags;
>
>         if (unlikely(khugepaged_test_exit(mm)))
>                 return SCAN_ANY_PROCESS;
> @@ -909,7 +916,18 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
>         hend = vma->vm_end & HPAGE_PMD_MASK;
>         if (address < hstart || address + HPAGE_PMD_SIZE > hend)
>                 return SCAN_ADDRESS_RANGE;
> -       if (!hugepage_vma_check(vma, vma->vm_flags))
> +
> +       /*
> +        * If !cc->enforce_thp_enabled, set VM_HUGEPAGE so that
> +        * hugepage_vma_check() can pass even if
> +        * TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG is set (i.e. "madvise" mode).
> +        * Note that hugepage_vma_check() doesn't enforce that
> +        * TRANSPARENT_HUGEPAGE_FLAG or TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG
> +        * must be set (i.e. "never" mode).
> +        */
> +       vma_flags = cc->enforce_thp_enabled ?  vma->vm_flags
> +                       : vma->vm_flags | VM_HUGEPAGE;
> +       if (!hugepage_vma_check(vma, vma_flags))
>                 return SCAN_VMA_CHECK;
>         /* Anon VMA expected */
>         if (!vma->anon_vma || !vma_is_anonymous(vma))
> @@ -953,7 +971,8 @@ static int find_pmd_or_thp_or_none(struct mm_struct *mm,
>  static bool __collapse_huge_page_swapin(struct mm_struct *mm,
>                                         struct vm_area_struct *vma,
>                                         unsigned long haddr, pmd_t *pmd,
> -                                       int referenced)
> +                                       int referenced,
> +                                       struct collapse_control *cc)
>  {
>         int swapped_in = 0;
>         vm_fault_t ret = 0;
> @@ -980,7 +999,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
>                 /* do_swap_page returns VM_FAULT_RETRY with released mmap_lock */
>                 if (ret & VM_FAULT_RETRY) {
>                         mmap_read_lock(mm);
> -                       if (hugepage_vma_revalidate(mm, haddr, &vma)) {
> +                       if (hugepage_vma_revalidate(mm, haddr, &vma, cc)) {
>                                 /* vma is no longer available, don't continue to swapin */
>                                 trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
>                                 return false;
> @@ -1047,7 +1066,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
>                 goto out_nolock;
>
>         mmap_read_lock(mm);
> -       result = hugepage_vma_revalidate(mm, address, &vma);
> +       result = hugepage_vma_revalidate(mm, address, &vma, cc);
>         if (result) {
>                 mmap_read_unlock(mm);
>                 goto out_nolock;
> @@ -1066,7 +1085,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
>          * Continuing to collapse causes inconsistency.
>          */
>         if (unmapped && !__collapse_huge_page_swapin(mm, vma, address,
> -                                                    pmd, referenced)) {
> +                                                    pmd, referenced, cc)) {
>                 mmap_read_unlock(mm);
>                 goto out_nolock;
>         }
> @@ -1078,7 +1097,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
>          * handled by the anon_vma lock + PG_lock.
>          */
>         mmap_write_lock(mm);
> -       result = hugepage_vma_revalidate(mm, address, &vma);
> +       result = hugepage_vma_revalidate(mm, address, &vma, cc);
>         if (result)
>                 goto out_up_write;
>         /* check if the pmd is still valid */
> @@ -2277,6 +2296,7 @@ static int khugepaged(void *none)
>         struct mm_slot *mm_slot;
>         struct collapse_control cc = {
>                 .enforce_page_heuristics = true,
> +               .enforce_thp_enabled = true,
>                 .last_target_node = NUMA_NO_NODE,
>                 /* .gfp set later  */
>         };
> --
> 2.36.1.255.ge46751e96f-goog
>




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux