When follow_pfn hits a pud_huge() it won't return a valid PFN for a PUD. Fix it by adding @pudp and thus allow callers to get the pud pointer. If we encounter such a huge page, we calculate the offset to the PUD accordingly. This allows KVM to handle 1G hugepage pfns on VM_PFNMAP vmas. Co-developed-by: Nikita Leshenko <nikita.leshchenko@xxxxxxxxxx> Signed-off-by: Nikita Leshenko <nikita.leshchenko@xxxxxxxxxx> Signed-off-by: Joao Martins <joao.m.martins@xxxxxxxxxx> --- mm/memory.c | 58 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 109643219e1b..f46646630497 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4261,9 +4261,10 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) } #endif /* __PAGETABLE_PMD_FOLDED */ -static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, +static int __follow_pte_pud(struct mm_struct *mm, unsigned long address, struct mmu_notifier_range *range, - pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) + pte_t **ptepp, pmd_t **pmdpp, pud_t **pudpp, + spinlock_t **ptlp) { pgd_t *pgd; p4d_t *p4d; @@ -4280,6 +4281,28 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, goto out; pud = pud_offset(p4d, address); + VM_BUG_ON(pud_trans_huge(*pud)); + + if (pud_huge(*pud)) { + if (!pudpp) + goto out; + + if (range) { + mmu_notifier_range_init(range, MMU_NOTIFY_CLEAR, 0, + NULL, mm, address & PUD_MASK, + (address & PUD_MASK) + PUD_SIZE); + mmu_notifier_invalidate_range_start(range); + } + *ptlp = pud_lock(mm, pud); + if (pud_huge(*pud)) { + *pudpp = pud; + return 0; + } + spin_unlock(*ptlp); + if (range) + mmu_notifier_invalidate_range_end(range); + } + if (pud_none(*pud) || unlikely(pud_bad(*pud))) goto out; @@ -4335,8 +4358,8 @@ static inline int follow_pte(struct mm_struct *mm, unsigned long address, /* (void) is needed to make gcc happy */ (void) __cond_lock(*ptlp, - !(res = __follow_pte_pmd(mm, address, NULL, - ptepp, NULL, ptlp))); + !(res = __follow_pte_pud(mm, address, NULL, + ptepp, NULL, NULL, ptlp))); return res; } @@ -4348,12 +4371,26 @@ int follow_pte_pmd(struct mm_struct *mm, unsigned long address, /* (void) is needed to make gcc happy */ (void) __cond_lock(*ptlp, - !(res = __follow_pte_pmd(mm, address, range, - ptepp, pmdpp, ptlp))); + !(res = __follow_pte_pud(mm, address, range, + ptepp, pmdpp, NULL, ptlp))); return res; } EXPORT_SYMBOL(follow_pte_pmd); +static int follow_pte_pud(struct mm_struct *mm, unsigned long address, + struct mmu_notifier_range *range, + pte_t **ptepp, pmd_t **pmdpp, pud_t **pudpp, + spinlock_t **ptlp) +{ + int res; + + /* (void) is needed to make gcc happy */ + (void) __cond_lock(*ptlp, + !(res = __follow_pte_pud(mm, address, range, + ptepp, pmdpp, pudpp, ptlp))); + return res; +} + /** * follow_pfn - look up PFN at a user virtual address * @vma: memory mapping @@ -4368,6 +4405,7 @@ int follow_pfn(struct vm_area_struct *vma, unsigned long address, unsigned long *pfn) { pmd_t *pmdpp = NULL; + pud_t *pudpp = NULL; int ret = -EINVAL; spinlock_t *ptl; pte_t *ptep; @@ -4375,11 +4413,13 @@ int follow_pfn(struct vm_area_struct *vma, unsigned long address, if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) return ret; - ret = follow_pte_pmd(vma->vm_mm, address, NULL, - &ptep, &pmdpp, &ptl); + ret = follow_pte_pud(vma->vm_mm, address, NULL, + &ptep, &pmdpp, &pudpp, &ptl); if (ret) return ret; - if (pmdpp) + if (pudpp) + *pfn = pud_pfn(*pudpp) + ((address & ~PUD_MASK) >> PAGE_SHIFT); + else if (pmdpp) *pfn = pmd_pfn(*pmdpp) + ((address & ~PMD_MASK) >> PAGE_SHIFT); else *pfn = pte_pfn(*ptep); -- 2.17.1