Handle accesses to huge pages mapped with PAGE_METADATA_NONE in a similar way to how accesses to PTEs are handled. Signed-off-by: Alexandru Elisei <alexandru.elisei@xxxxxxx> --- include/asm-generic/memory_metadata.h | 2 + include/linux/huge_mm.h | 6 ++ mm/huge_memory.c | 108 ++++++++++++++++++++++++++ mm/memory.c | 7 +- 4 files changed, 121 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/memory_metadata.h b/include/asm-generic/memory_metadata.h index 4176fd89ef41..dfdf2dd82ea6 100644 --- a/include/asm-generic/memory_metadata.h +++ b/include/asm-generic/memory_metadata.h @@ -7,6 +7,8 @@ extern unsigned long totalmetadata_pages; +void migrate_metadata_none_page(struct page *page, struct vm_area_struct *vma); + #ifndef CONFIG_MEMORY_METADATA static inline bool metadata_storage_enabled(void) { diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 20284387b841..6920571b5b6d 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -229,6 +229,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pud, int flags, struct dev_pagemap **pgmap); vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf); +vm_fault_t do_huge_pmd_metadata_none_page(struct vm_fault *vmf); extern struct page *huge_zero_page; extern unsigned long huge_zero_pfn; @@ -356,6 +357,11 @@ static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) return 0; } +static inline vm_fault_t do_huge_pmd_metadata_none_page(struct vm_fault *vmf) +{ + return 0; +} + static inline bool is_huge_zero_page(struct page *page) { return false; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index cf5247b012de..06038424c3a7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -26,6 +26,7 @@ #include <linux/mman.h> #include <linux/memremap.h> #include <linux/pagemap.h> +#include <linux/page-isolation.h> #include <linux/debugfs.h> #include <linux/migrate.h> #include <linux/hashtable.h> @@ -38,6 +39,7 @@ #include <linux/sched/sysctl.h> #include <linux/memory-tiers.h> +#include <asm/memory_metadata.h> #include <asm/tlb.h> #include <asm/pgalloc.h> #include "internal.h" @@ -1490,6 +1492,112 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, return page; } +vm_fault_t do_huge_pmd_metadata_none_page(struct vm_fault *vmf) +{ + unsigned long haddr = vmf->address & HPAGE_PMD_MASK; + struct vm_area_struct *vma = vmf->vma; + pmd_t old_pmd = vmf->orig_pmd; + struct page *page = NULL; + bool do_migrate = false; + bool writable = false; + vm_fault_t err; + pmd_t new_pmd; + int ret; + + vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd); + spin_lock(vmf->ptl); + if (unlikely(!pmd_same(*vmf->pmd, old_pmd))) { + spin_unlock(vmf->ptl); + return 0; + } + + new_pmd = pmd_modify(old_pmd, vma->vm_page_prot); + + /* + * Detect now whether the PMD could be writable; this information + * is only valid while holding the PT lock. + */ + writable = pmd_write(new_pmd); + if (!writable && vma_wants_manual_pte_write_upgrade(vma) && + can_change_pmd_writable(vma, vmf->address, new_pmd)) + writable = true; + + page = vm_normal_page_pmd(vma, vmf->address, new_pmd); + if (!page) + goto out_map; + + /* + * This should never happen, once a VMA has been marked as tagged, that + * cannot be changed. + */ + if (!(vma->vm_flags & VM_MTE)) + goto out_map; + + /* Prevent the page from being unmapped from under us. */ + get_page(page); + vma_set_access_pid_bit(vma); + + spin_unlock(vmf->ptl); + writable = false; + + if (unlikely(is_migrate_isolate_page(page))) { + if (!(vmf->flags & FAULT_FLAG_TRIED)) + err = VM_FAULT_RETRY; + else + err = 0; + put_page(page); + } else if (is_migrate_metadata_page(page)) { + do_migrate = true; + } else { + ret = reserve_metadata_storage(page, HPAGE_PMD_ORDER, GFP_HIGHUSER_MOVABLE); + if (ret == -EINTR) { + put_page(page); + return VM_FAULT_RETRY; + } else if (ret) { + if (unlikely(page_metadata_in_swap(page))) { + if (vmf->flags & FAULT_FLAG_TRIED) + err = VM_FAULT_OOM; + else + err = VM_FAULT_RETRY; + + put_page(page); + return err; + } + do_migrate = true; + } + } + + if (do_migrate) { + migrate_metadata_none_page(page, vma); + /* + * Either the page was migrated, in which case there's nothing + * we need to do; either migration failed, in which case all we + * can do is try again. So don't change the pte. + */ + return 0; + } + + put_page(page); + + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); + if (unlikely(!pmd_same(*vmf->pmd, old_pmd))) { + spin_unlock(vmf->ptl); + return 0; + } + +out_map: + new_pmd = pmd_modify(old_pmd, vma->vm_page_prot); + new_pmd = pmd_mkyoung(new_pmd); + if (writable) + new_pmd = pmd_mkwrite(new_pmd); + set_pmd_at(vma->vm_mm, haddr, vmf->pmd, new_pmd); + update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); + spin_unlock(vmf->ptl); + + return 0; +} + + /* NUMA hinting page fault entry point for trans huge pmds */ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) { diff --git a/mm/memory.c b/mm/memory.c index ade71f38b2ff..6d78d33ef91f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4695,7 +4695,7 @@ static vm_fault_t do_fault(struct vm_fault *vmf) } /* Returns with the page reference dropped. */ -static void migrate_metadata_none_page(struct page *page, struct vm_area_struct *vma) +void migrate_metadata_none_page(struct page *page, struct vm_area_struct *vma) { struct migration_target_control mtc = { .nid = NUMA_NO_NODE, @@ -5234,8 +5234,11 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, return 0; } if (pmd_trans_huge(vmf.orig_pmd) || pmd_devmap(vmf.orig_pmd)) { - if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma)) + if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma)) { + if (metadata_storage_enabled() && pmd_metadata_none(vmf.orig_pmd)) + return do_huge_pmd_metadata_none_page(&vmf); return do_huge_pmd_numa_page(&vmf); + } if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) && !pmd_write(vmf.orig_pmd)) { -- 2.41.0