Although this change is large, it is somewhat straightforward. Before, all users of walk_hugetlb_range could get the size of the PTE just be checking the hmask or the mm_walk struct. With HGM, that information is held in the hugetlb_pte struct, so we provide that instead of the raw pte_t*. Signed-off-by: James Houghton <jthoughton@xxxxxxxxxx> --- arch/s390/mm/gmap.c | 8 ++++++-- fs/proc/task_mmu.c | 35 +++++++++++++++++++---------------- include/linux/pagewalk.h | 3 ++- mm/damon/vaddr.c | 34 ++++++++++++++++++---------------- mm/hmm.c | 7 ++++--- mm/mempolicy.c | 11 ++++++++--- mm/mincore.c | 4 ++-- mm/mprotect.c | 6 +++--- mm/pagewalk.c | 18 ++++++++++++++++-- 9 files changed, 78 insertions(+), 48 deletions(-) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index b8ae4a4aa2ba..518cebfd72cd 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2620,10 +2620,14 @@ static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr, return 0; } -static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, - unsigned long hmask, unsigned long next, +static int __s390_enable_skey_hugetlb(struct hugetlb_pte *hpte, + unsigned long addr, unsigned long next, struct mm_walk *walk) { + if (!hugetlb_pte_present_leaf(hpte) || + hugetlb_pte_size(hpte) != PMD_SIZE) + return -EINVAL; + pmd_t *pmd = (pmd_t *)pte; unsigned long start, end; struct page *page = pmd_page(*pmd); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 2d04e3470d4c..b2d683f99fa9 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -714,18 +714,19 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) } #ifdef CONFIG_HUGETLB_PAGE -static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, +static int smaps_hugetlb_range(struct hugetlb_pte *hpte, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; struct page *page = NULL; + pte_t pte = hugetlb_ptep_get(hpte); - if (pte_present(*pte)) { - page = vm_normal_page(vma, addr, *pte); - } else if (is_swap_pte(*pte)) { - swp_entry_t swpent = pte_to_swp_entry(*pte); + if (hugetlb_pte_present_leaf(hpte)) { + page = vm_normal_page(vma, addr, pte); + } else if (is_swap_pte(pte)) { + swp_entry_t swpent = pte_to_swp_entry(pte); if (is_pfn_swap_entry(swpent)) page = pfn_swap_entry_to_page(swpent); @@ -734,9 +735,9 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, int mapcount = page_mapcount(page); if (mapcount >= 2) - mss->shared_hugetlb += huge_page_size(hstate_vma(vma)); + mss->shared_hugetlb += hugetlb_pte_size(hpte); else - mss->private_hugetlb += huge_page_size(hstate_vma(vma)); + mss->private_hugetlb += hugetlb_pte_size(hpte); } return 0; } @@ -1535,7 +1536,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, #ifdef CONFIG_HUGETLB_PAGE /* This function walks within one hugetlb entry in the single call */ -static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, +static int pagemap_hugetlb_range(struct hugetlb_pte *hpte, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -1543,13 +1544,13 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, struct vm_area_struct *vma = walk->vma; u64 flags = 0, frame = 0; int err = 0; - pte_t pte; + unsigned long hmask = hugetlb_pte_mask(hpte); if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; - pte = huge_ptep_get(ptep); - if (pte_present(pte)) { + if (hugetlb_pte_present_leaf(hpte)) { + pte_t pte = hugetlb_ptep_get(hpte); struct page *page = pte_page(pte); if (!PageAnon(page)) @@ -1565,7 +1566,7 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, if (pm->show_pfn) frame = pte_pfn(pte) + ((addr & ~hmask) >> PAGE_SHIFT); - } else if (pte_swp_uffd_wp_any(pte)) { + } else if (pte_swp_uffd_wp_any(hugetlb_ptep_get(hpte))) { flags |= PM_UFFD_WP; } @@ -1869,17 +1870,19 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, return 0; } #ifdef CONFIG_HUGETLB_PAGE -static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, - unsigned long addr, unsigned long end, struct mm_walk *walk) +static int gather_hugetlb_stats(struct hugetlb_pte *hpte, unsigned long addr, + unsigned long end, struct mm_walk *walk) { - pte_t huge_pte = huge_ptep_get(pte); + pte_t huge_pte = hugetlb_ptep_get(hpte); struct numa_maps *md; struct page *page; - if (!pte_present(huge_pte)) + if (!hugetlb_pte_present_leaf(hpte)) return 0; page = pte_page(huge_pte); + if (page != compound_head(page)) + return 0; md = walk->private; gather_stats(page, md, pte_dirty(huge_pte), 1); diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index ac7b38ad5903..0d21e25df37f 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@ -3,6 +3,7 @@ #define _LINUX_PAGEWALK_H #include <linux/mm.h> +#include <linux/hugetlb.h> struct mm_walk; @@ -47,7 +48,7 @@ struct mm_walk_ops { unsigned long next, struct mm_walk *walk); int (*pte_hole)(unsigned long addr, unsigned long next, int depth, struct mm_walk *walk); - int (*hugetlb_entry)(pte_t *pte, unsigned long hmask, + int (*hugetlb_entry)(struct hugetlb_pte *hpte, unsigned long addr, unsigned long next, struct mm_walk *walk); int (*test_walk)(unsigned long addr, unsigned long next, diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 59e1653799f8..ce50b937dcf2 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -324,14 +324,15 @@ static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, } #ifdef CONFIG_HUGETLB_PAGE -static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, +static void damon_hugetlb_mkold(struct hugetlb_pte *hpte, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr) { bool referenced = false; pte_t entry = huge_ptep_get(pte); struct page *page = pte_page(entry); + struct page *hpage = compound_head(page); - get_page(page); + get_page(hpage); if (pte_young(entry)) { referenced = true; @@ -342,18 +343,18 @@ static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, #ifdef CONFIG_MMU_NOTIFIER if (mmu_notifier_clear_young(mm, addr, - addr + huge_page_size(hstate_vma(vma)))) + addr + hugetlb_pte_size(hpte)); referenced = true; #endif /* CONFIG_MMU_NOTIFIER */ if (referenced) - set_page_young(page); + set_page_young(hpage); - set_page_idle(page); - put_page(page); + set_page_idle(hpage); + put_page(hpage); } -static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask, +static int damon_mkold_hugetlb_entry(struct hugetlb_pte *hpte, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -361,12 +362,12 @@ static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask, spinlock_t *ptl; pte_t entry; - ptl = huge_pte_lock(h, walk->mm, pte); - entry = huge_ptep_get(pte); + ptl = huge_pte_lock_shift(hpte->shift, walk->mm, hpte->ptep); + entry = huge_ptep_get(hpte->ptep); if (!pte_present(entry)) goto out; - damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr); + damon_hugetlb_mkold(hpte, walk->mm, walk->vma, addr); out: spin_unlock(ptl); @@ -474,31 +475,32 @@ static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr, } #ifdef CONFIG_HUGETLB_PAGE -static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask, +static int damon_young_hugetlb_entry(struct hugetlb_pte *hpte, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct damon_young_walk_private *priv = walk->private; struct hstate *h = hstate_vma(walk->vma); - struct page *page; + struct page *page, *hpage; spinlock_t *ptl; pte_t entry; - ptl = huge_pte_lock(h, walk->mm, pte); + ptl = huge_pte_lock_shift(hpte->shift, walk->mm, hpte->ptep); entry = huge_ptep_get(pte); if (!pte_present(entry)) goto out; page = pte_page(entry); - get_page(page); + hpage = compound_head(page); + get_page(hpage); - if (pte_young(entry) || !page_is_idle(page) || + if (pte_young(entry) || !page_is_idle(hpage) || mmu_notifier_test_young(walk->mm, addr)) { *priv->page_sz = huge_page_size(h); priv->young = true; } - put_page(page); + put_page(hpage); out: spin_unlock(ptl); diff --git a/mm/hmm.c b/mm/hmm.c index 3fd3242c5e50..1ad5d76fa8be 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -472,7 +472,7 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, #endif #ifdef CONFIG_HUGETLB_PAGE -static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, +static int hmm_vma_walk_hugetlb_entry(struct hugetlb_pte *hpte, unsigned long start, unsigned long end, struct mm_walk *walk) { @@ -483,11 +483,12 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, unsigned int required_fault; unsigned long pfn_req_flags; unsigned long cpu_flags; + unsigned long hmask = hugetlb_pte_mask(hpte); spinlock_t *ptl; pte_t entry; - ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte); - entry = huge_ptep_get(pte); + ptl = huge_pte_lock_shift(hpte->shift, walk->mm, hpte->ptep); + entry = huge_ptep_get(hpte->ptep); i = (start - range->start) >> PAGE_SHIFT; pfn_req_flags = range->hmm_pfns[i]; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d39b01fd52fe..a1d82db7c19f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -559,7 +559,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, return addr != end ? -EIO : 0; } -static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, +static int queue_pages_hugetlb(struct hugetlb_pte *hpte, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -571,8 +571,13 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, spinlock_t *ptl; pte_t entry; - ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte); - entry = huge_ptep_get(pte); + /* We don't migrate high-granularity HugeTLB mappings for now. */ + if (hugetlb_pte_size(hpte) != + huge_page_size(hstate_vma(walk->vma))) + return -EINVAL; + + ptl = hugetlb_pte_lock(walk->mm, hpte); + entry = hugetlb_ptep_get(hpte); if (!pte_present(entry)) goto unlock; page = pte_page(entry); diff --git a/mm/mincore.c b/mm/mincore.c index fa200c14185f..dc1717dc6a2c 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -22,7 +22,7 @@ #include <linux/uaccess.h> #include "swap.h" -static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, +static int mincore_hugetlb(struct hugetlb_pte *hpte, unsigned long addr, unsigned long end, struct mm_walk *walk) { #ifdef CONFIG_HUGETLB_PAGE @@ -33,7 +33,7 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, * Hugepages under user process are always in RAM and never * swapped out, but theoretically it needs to be checked. */ - present = pte && !huge_pte_none(huge_ptep_get(pte)); + present = hpte->ptep && !hugetlb_pte_none(hpte); for (; addr != end; vec++, addr += PAGE_SIZE) *vec = present; walk->private = vec; diff --git a/mm/mprotect.c b/mm/mprotect.c index ba5592655ee3..9c5a35a1c0eb 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -476,12 +476,12 @@ static int prot_none_pte_entry(pte_t *pte, unsigned long addr, 0 : -EACCES; } -static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask, +static int prot_none_hugetlb_entry(struct hugetlb_pte *hpte, unsigned long addr, unsigned long next, struct mm_walk *walk) { - return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ? - 0 : -EACCES; + return pfn_modify_allowed(pte_pfn(*hpte->ptep), + *(pgprot_t *)(walk->private)) ? 0 : -EACCES; } static int prot_none_test(unsigned long addr, unsigned long next, diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 9b3db11a4d1d..f8e24a0a0179 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -3,6 +3,7 @@ #include <linux/highmem.h> #include <linux/sched.h> #include <linux/hugetlb.h> +#include <linux/minmax.h> /* * We want to know the real level where a entry is located ignoring any @@ -301,13 +302,26 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end, pte_t *pte; const struct mm_walk_ops *ops = walk->ops; int err = 0; + struct hugetlb_pte hpte; do { - next = hugetlb_entry_end(h, addr, end); pte = huge_pte_offset(walk->mm, addr & hmask, sz); + if (!pte) { + next = hugetlb_entry_end(h, addr, end); + } else { + hugetlb_pte_populate(&hpte, pte, huge_page_shift(h)); + if (hugetlb_hgm_enabled(vma)) { + err = hugetlb_walk_to(walk->mm, &hpte, addr, + PAGE_SIZE, + /*stop_at_none=*/true); + if (err) + break; + } + next = min(addr + hugetlb_pte_size(&hpte), end); + } if (pte) - err = ops->hugetlb_entry(pte, hmask, addr, next, walk); + err = ops->hugetlb_entry(&hpte, addr, next, walk); else if (ops->pte_hole) err = ops->pte_hole(addr, next, -1, walk); -- 2.37.0.rc0.161.g10f37bed90-goog