From: Zi Yan <ziy@xxxxxxxxxx> madvise can set this bit via MADV_HUGEPAGE | MADV_HUGEPAGE_1GB and unset it via MADV_NOHUGEPAGE | MADV_HUGEPAGE_1GB. Later, kernel will check this bit to decide whether to allocate PUD THPs or not on a VMA when the global PUD THP is set to madvise. Signed-off-by: Zi Yan <ziy@xxxxxxxxxx> --- include/linux/mm.h | 6 ++++++ mm/khugepaged.c | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 51b75ffa6a6c..78bee63c64da 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -305,11 +305,13 @@ extern unsigned int kobjsize(const void *objp); #define VM_HIGH_ARCH_BIT_2 34 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */ +#define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0) #define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1) #define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2) #define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3) #define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4) +#define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5) #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */ #ifdef CONFIG_ARCH_HAS_PKEYS @@ -325,6 +327,10 @@ extern unsigned int kobjsize(const void *objp); #endif #endif /* CONFIG_ARCH_HAS_PKEYS */ +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +#define VM_HUGEPAGE_PUD VM_HIGH_ARCH_5 +#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ + #if defined(CONFIG_X86) # define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */ #elif defined(CONFIG_PPC) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index b34c78085017..f085c218ea84 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -345,6 +345,9 @@ struct attribute_group khugepaged_attr_group = { int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice) { + /* only support 1GB PUD THP on x86 now */ + bool use_pud_page = advice & MADV_HUGEPAGE_1GB; + advice = advice & MADV_BEHAVIOR_MASK; switch (advice) { case MADV_HUGEPAGE: @@ -359,6 +362,9 @@ int hugepage_madvise(struct vm_area_struct *vma, #endif *vm_flags &= ~VM_NOHUGEPAGE; *vm_flags |= VM_HUGEPAGE; + + if (use_pud_page) + *vm_flags |= VM_HUGEPAGE_PUD; /* * If the vma become good for khugepaged to scan, * register it here without waiting a page fault that @@ -371,6 +377,9 @@ int hugepage_madvise(struct vm_area_struct *vma, case MADV_NOHUGEPAGE: *vm_flags &= ~VM_HUGEPAGE; *vm_flags |= VM_NOHUGEPAGE; + + if (use_pud_page) + *vm_flags &= ~VM_HUGEPAGE_PUD; /* * Setting VM_NOHUGEPAGE will prevent khugepaged from scanning * this vma even if we leave the mm registered in khugepaged if -- 2.28.0