From: Zi Yan <ziy@xxxxxxxxxx> Like the existing global PMD THP knob, it allows user to enable/disable PUD THPs. PUD THP is disabled by default unless user knows the performance tradeoff of using it, like longer first time page fault due to larger page zeroing and longer page allocation time when memory is fragmented. Experienced user can enable it and take advantage of its benefit of suffering fewer page faults and TLB misses. * always means PUD THPs will be allocated on all VMAs if possible. * madvise means PUD THPs will be allocated if vm_flags has VM_HUGEPAGE_PUD set via madvise syscall using MADV_HUGEPAGE | MADV_HUGEPAGE_PUD. * none means PUD THPs will not be allocated on any VMA. Signed-off-by: Zi Yan <ziy@xxxxxxxxxx> --- include/linux/huge_mm.h | 14 ++++++++++++++ mm/huge_memory.c | 38 ++++++++++++++++++++++++++++++++++++++ mm/memory.c | 2 +- 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index c7bc40c4a5e2..0d0f9cf25aeb 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -119,6 +119,8 @@ enum transparent_hugepage_flag { #ifdef CONFIG_DEBUG_VM TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG, #endif + TRANSPARENT_PUD_HUGEPAGE_FLAG, + TRANSPARENT_PUD_HUGEPAGE_REQ_MADV_FLAG, }; struct kobject; @@ -184,6 +186,18 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) } bool transparent_hugepage_enabled(struct vm_area_struct *vma); +static inline bool transparent_pud_hugepage_enabled(struct vm_area_struct *vma) +{ + if (transparent_hugepage_enabled(vma)) { + if (transparent_hugepage_flags & (1 << TRANSPARENT_PUD_HUGEPAGE_FLAG)) + return true; + if (transparent_hugepage_flags & + (1 << TRANSPARENT_PUD_HUGEPAGE_REQ_MADV_FLAG)) + return !!(vma->vm_flags & VM_HUGEPAGE_PUD); + } + + return false; +} #define HPAGE_CACHE_INDEX_MASK (HPAGE_PMD_NR - 1) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 61ae7a0ded84..1965753b31a2 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -199,6 +199,43 @@ static ssize_t enabled_store(struct kobject *kobj, static struct kobj_attribute enabled_attr = __ATTR(enabled, 0644, enabled_show, enabled_store); +static ssize_t enabled_pud_thp_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + if (test_bit(TRANSPARENT_PUD_HUGEPAGE_FLAG, &transparent_hugepage_flags)) + return sprintf(buf, "[always] madvise never\n"); + else if (test_bit(TRANSPARENT_PUD_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags)) + return sprintf(buf, "always [madvise] never\n"); + else + return sprintf(buf, "always madvise [never]\n"); +} + +static ssize_t enabled_pud_thp_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + ssize_t ret = count; + + if (!memcmp("always", buf, + min(sizeof("always")-1, count))) { + clear_bit(TRANSPARENT_PUD_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); + set_bit(TRANSPARENT_PUD_HUGEPAGE_FLAG, &transparent_hugepage_flags); + } else if (!memcmp("madvise", buf, + min(sizeof("madvise")-1, count))) { + clear_bit(TRANSPARENT_PUD_HUGEPAGE_FLAG, &transparent_hugepage_flags); + set_bit(TRANSPARENT_PUD_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); + } else if (!memcmp("never", buf, + min(sizeof("never")-1, count))) { + clear_bit(TRANSPARENT_PUD_HUGEPAGE_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_PUD_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); + } else + ret = -EINVAL; + + return ret; +} +static struct kobj_attribute enabled_pud_thp_attr = + __ATTR(enabled_pud_thp, 0644, enabled_pud_thp_show, enabled_pud_thp_store); + ssize_t single_hugepage_flag_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf, enum transparent_hugepage_flag flag) @@ -305,6 +342,7 @@ static struct kobj_attribute hpage_pmd_size_attr = static struct attribute *hugepage_attr[] = { &enabled_attr.attr, + &enabled_pud_thp_attr.attr, &defrag_attr.attr, &use_zero_page_attr.attr, &hpage_pmd_size_attr.attr, diff --git a/mm/memory.c b/mm/memory.c index ab80d13807aa..9f7b509a3aa7 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4282,7 +4282,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, if (!vmf.pud) return VM_FAULT_OOM; retry_pud: - if (pud_none(*vmf.pud) && __transparent_hugepage_enabled(vma)) { + if (pud_none(*vmf.pud) && transparent_pud_hugepage_enabled(vma)) { ret = create_huge_pud(&vmf); if (!(ret & VM_FAULT_FALLBACK)) return ret; -- 2.28.0