The patch titled Subject: mm: change huge_ptep_clear_flush() to return the original pte has been added to the -mm mm-unstable branch. Its filename is mm-change-huge_ptep_clear_flush-to-return-the-original-pte.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-change-huge_ptep_clear_flush-to-return-the-original-pte.patch This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx> Subject: mm: change huge_ptep_clear_flush() to return the original pte Patch series "Fix CONT-PTE/PMD size hugetlb issue when unmapping or migrating", v3. presently, migrating a hugetlb page or unmapping a poisoned hugetlb page, we'll use ptep_clear_flush() and set_pte_at() to nuke the page table entry and remap it, and this is incorrect for CONT-PTE or CONT-PMD size hugetlb page, which will cause potential data consistent issue. This patch set will change to use hugetlb related APIs to fix this issue. Note: Mike pointed out the huge_ptep_get() will only return the one specific value, and it would not take into account the dirty or young bits of CONT-PTE/PMDs like the huge_ptep_get_and_clear() [1]. This inconsistent issue is not introduced by this patch set, and this issue will be addressed in another thread [2]. Meanwhile the uffd for hugetlb case [3] pointed out by Gerald also needs another patch to address. [1] https://lore.kernel.org/linux-mm/85bd80b4-b4fd-0d3f-a2e5-149559f2f387@xxxxxxxxxx/ [2] https://lore.kernel.org/all/cover.1651998586.git.baolin.wang@xxxxxxxxxxxxxxxxx/ [3] https://lore.kernel.org/linux-mm/20220503120343.6264e126@thinkpad/ It is incorrect to use ptep_clear_flush() to nuke a hugetlb page table when unmapping or migrating a hugetlb page, and will change to use huge_ptep_clear_flush() instead in the following patches. So this is a preparation patch, which changes the huge_ptep_clear_flush() to return the original pte to help to nuke a hugetlb page table. Link: https://lkml.kernel.org/r/cover.1652147571.git.baolin.wang@xxxxxxxxxxxxxxxxx Link: https://lkml.kernel.org/r/dcf065868cce35bceaf138613ad27f17bb7c0c19.1652147571.git.baolin.wang@xxxxxxxxxxxxxxxxx Signed-off-by: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx> Acked-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx> Reviewed-by: Muchun Song <songmuchun@xxxxxxxxxxxxx> Cc: Catalin Marinas <catalin.marinas@xxxxxxx> Cc: Will Deacon <will@xxxxxxxxxx> Cc: Thomas Bogendoerfer <tsbogend@xxxxxxxxxxxxxxxx> Cc: James Bottomley <James.Bottomley@xxxxxxxxxxxxxxxxxxxxx> Cc: Helge Deller <deller@xxxxxx> Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx> Cc: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx> Cc: Paul Mackerras <paulus@xxxxxxxxx> Cc: Heiko Carstens <hca@xxxxxxxxxxxxx> Cc: Vasily Gorbik <gor@xxxxxxxxxxxxx> Cc: Alexander Gordeev <agordeev@xxxxxxxxxxxxx> Cc: Christian Borntraeger <borntraeger@xxxxxxxxxxxxx> Cc: Sven Schnelle <svens@xxxxxxxxxxxxx> Cc: Yoshinori Sato <ysato@xxxxxxxxxxxxx> Cc: Rich Felker <dalias@xxxxxxxx> Cc: David S. Miller <davem@xxxxxxxxxxxxx> Cc: Arnd Bergmann <arnd@xxxxxxxx> Cc: Gerald Schaefer <gerald.schaefer@xxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- arch/arm64/include/asm/hugetlb.h | 4 ++-- arch/arm64/mm/hugetlbpage.c | 12 +++++------- arch/ia64/include/asm/hugetlb.h | 4 ++-- arch/mips/include/asm/hugetlb.h | 9 ++++++--- arch/parisc/include/asm/hugetlb.h | 4 ++-- arch/powerpc/include/asm/hugetlb.h | 9 ++++++--- arch/s390/include/asm/hugetlb.h | 6 +++--- arch/sh/include/asm/hugetlb.h | 4 ++-- arch/sparc/include/asm/hugetlb.h | 4 ++-- include/asm-generic/hugetlb.h | 4 ++-- 10 files changed, 32 insertions(+), 28 deletions(-) --- a/arch/arm64/include/asm/hugetlb.h~mm-change-huge_ptep_clear_flush-to-return-the-original-pte +++ a/arch/arm64/include/asm/hugetlb.h @@ -39,8 +39,8 @@ extern pte_t huge_ptep_get_and_clear(str extern void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep); #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH -extern void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep); +extern pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); #define __HAVE_ARCH_HUGE_PTE_CLEAR extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz); --- a/arch/arm64/mm/hugetlbpage.c~mm-change-huge_ptep_clear_flush-to-return-the-original-pte +++ a/arch/arm64/mm/hugetlbpage.c @@ -486,19 +486,17 @@ void huge_ptep_set_wrprotect(struct mm_s set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); } -void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) +pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) { size_t pgsize; int ncontig; - if (!pte_cont(READ_ONCE(*ptep))) { - ptep_clear_flush(vma, addr, ptep); - return; - } + if (!pte_cont(READ_ONCE(*ptep))) + return ptep_clear_flush(vma, addr, ptep); ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); - clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); + return get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); } static int __init hugetlbpage_init(void) --- a/arch/ia64/include/asm/hugetlb.h~mm-change-huge_ptep_clear_flush-to-return-the-original-pte +++ a/arch/ia64/include/asm/hugetlb.h @@ -23,8 +23,8 @@ static inline int is_hugepage_only_range #define is_hugepage_only_range is_hugepage_only_range #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH -static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) +static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) { } --- a/arch/mips/include/asm/hugetlb.h~mm-change-huge_ptep_clear_flush-to-return-the-original-pte +++ a/arch/mips/include/asm/hugetlb.h @@ -43,16 +43,19 @@ static inline pte_t huge_ptep_get_and_cl } #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH -static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) +static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) { + pte_t pte; + /* * clear the huge pte entry firstly, so that the other smp threads will * not get old pte entry after finishing flush_tlb_page and before * setting new huge pte entry */ - huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); flush_tlb_page(vma, addr); + return pte; } #define __HAVE_ARCH_HUGE_PTE_NONE --- a/arch/parisc/include/asm/hugetlb.h~mm-change-huge_ptep_clear_flush-to-return-the-original-pte +++ a/arch/parisc/include/asm/hugetlb.h @@ -28,8 +28,8 @@ static inline int prepare_hugepage_range } #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH -static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) +static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) { } --- a/arch/powerpc/include/asm/hugetlb.h~mm-change-huge_ptep_clear_flush-to-return-the-original-pte +++ a/arch/powerpc/include/asm/hugetlb.h @@ -43,11 +43,14 @@ static inline pte_t huge_ptep_get_and_cl } #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH -static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) +static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) { - huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + pte_t pte; + + pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); flush_hugetlb_page(vma, addr); + return pte; } #define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS --- a/arch/s390/include/asm/hugetlb.h~mm-change-huge_ptep_clear_flush-to-return-the-original-pte +++ a/arch/s390/include/asm/hugetlb.h @@ -50,10 +50,10 @@ static inline void huge_pte_clear(struct set_pte(ptep, __pte(_SEGMENT_ENTRY_EMPTY)); } -static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep) +static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) { - huge_ptep_get_and_clear(vma->vm_mm, address, ptep); + return huge_ptep_get_and_clear(vma->vm_mm, address, ptep); } static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, --- a/arch/sh/include/asm/hugetlb.h~mm-change-huge_ptep_clear_flush-to-return-the-original-pte +++ a/arch/sh/include/asm/hugetlb.h @@ -21,8 +21,8 @@ static inline int prepare_hugepage_range } #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH -static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) +static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) { } --- a/arch/sparc/include/asm/hugetlb.h~mm-change-huge_ptep_clear_flush-to-return-the-original-pte +++ a/arch/sparc/include/asm/hugetlb.h @@ -21,8 +21,8 @@ pte_t huge_ptep_get_and_clear(struct mm_ pte_t *ptep); #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH -static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) +static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) { } --- a/include/asm-generic/hugetlb.h~mm-change-huge_ptep_clear_flush-to-return-the-original-pte +++ a/include/asm-generic/hugetlb.h @@ -84,10 +84,10 @@ static inline pte_t huge_ptep_get_and_cl #endif #ifndef __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH -static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, +static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { - ptep_clear_flush(vma, addr, ptep); + return ptep_clear_flush(vma, addr, ptep); } #endif _ Patches currently in -mm which might be from baolin.wang@xxxxxxxxxxxxxxxxx are mm-change-huge_ptep_clear_flush-to-return-the-original-pte.patch mm-rmap-fix-cont-pte-pmd-size-hugetlb-issue-when-migration.patch mm-rmap-fix-cont-pte-pmd-size-hugetlb-issue-when-unmapping.patch