Currently the config ARCH_WANT_GENERAL_HUGETLB enabled functions like 'huge_pte_alloc' and 'huge_pte_offset' dont take into account HugeTLB page implementation at the PGD level. This is also true for functions like 'follow_page_mask' which is called from move_pages() system call. This lack of PGD level huge page support prohibits some architectures to use these generic HugeTLB functions. This change adds the required PGD based implementation awareness and with that, more architectures like POWER which implements 16GB pages at the PGD level along with the 16MB pages at the PMD level can now use ARCH_WANT_GENERAL_HUGETLB config option. Signed-off-by: Anshuman Khandual <khandual@xxxxxxxxxxxxxxxxxx> --- include/linux/hugetlb.h | 3 +++ mm/gup.c | 6 ++++++ mm/hugetlb.c | 20 ++++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 7d953c2..71832e1 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -115,6 +115,8 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int flags); struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, pud_t *pud, int flags); +struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address, + pgd_t *pgd, int flags); int pmd_huge(pmd_t pmd); int pud_huge(pud_t pmd); unsigned long hugetlb_change_protection(struct vm_area_struct *vma, @@ -143,6 +145,7 @@ static inline void hugetlb_show_meminfo(void) } #define follow_huge_pmd(mm, addr, pmd, flags) NULL #define follow_huge_pud(mm, addr, pud, flags) NULL +#define follow_huge_pgd(mm, addr, pgd, flags) NULL #define prepare_hugepage_range(file, addr, len) (-EINVAL) #define pmd_huge(x) 0 #define pud_huge(x) 0 diff --git a/mm/gup.c b/mm/gup.c index fb87aea..9bac78c 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -234,6 +234,12 @@ struct page *follow_page_mask(struct vm_area_struct *vma, pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) return no_page_table(vma, flags); + if (pgd_huge(*pgd) && vma->vm_flags & VM_HUGETLB) { + page = follow_huge_pgd(mm, address, pgd, flags); + if (page) + return page; + return no_page_table(vma, flags); + } pud = pud_offset(pgd, address); if (pud_none(*pud)) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 19d0d08..5ea3158 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4250,6 +4250,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pte_t *pte = NULL; pgd = pgd_offset(mm, addr); + if (sz == PGDIR_SIZE) { + pte = (pte_t *)pgd; + goto huge_pgd; + } + pud = pud_alloc(mm, pgd, addr); if (pud) { if (sz == PUD_SIZE) { @@ -4262,6 +4267,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pte = (pte_t *)pmd_alloc(mm, pud, addr); } } + +huge_pgd: BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); return pte; @@ -4275,6 +4282,8 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) pgd = pgd_offset(mm, addr); if (pgd_present(*pgd)) { + if (pgd_huge(*pgd)) + return (pte_t *)pgd; pud = pud_offset(pgd, addr); if (pud_present(*pud)) { if (pud_huge(*pud)) @@ -4343,6 +4352,17 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address, return pte_page(*(pte_t *)pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); } +struct page * __weak +follow_huge_pgd(struct mm_struct *mm, unsigned long address, + pgd_t *pgd, int flags) +{ + if (flags & FOLL_GET) + return NULL; + + return pte_page(*(pte_t *)pgd) + + ((address & ~PGDIR_MASK) >> PAGE_SHIFT); +} + #ifdef CONFIG_MEMORY_FAILURE /* -- 2.1.0 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>