On 1/28/21 2:48 PM, Axel Rasmussen wrote: > From: Peter Xu <peterx@xxxxxxxxxx> > > Huge pmd sharing could bring problem to userfaultfd. The thing is that > userfaultfd is running its logic based on the special bits on page table > entries, however the huge pmd sharing could potentially share page table > entries for different address ranges. That could cause issues on either: > > - When sharing huge pmd page tables for an uffd write protected range, the > newly mapped huge pmd range will also be write protected unexpectedly, or, > > - When we try to write protect a range of huge pmd shared range, we'll first > do huge_pmd_unshare() in hugetlb_change_protection(), however that also > means the UFFDIO_WRITEPROTECT could be silently skipped for the shared > region, which could lead to data loss. > > Since at it, a few other things are done altogether: > > - Move want_pmd_share() from mm/hugetlb.c into linux/hugetlb.h, because > that's definitely something that arch code would like to use too > > - ARM64 currently directly check against CONFIG_ARCH_WANT_HUGE_PMD_SHARE when > trying to share huge pmd. Switch to the want_pmd_share() helper. > > Signed-off-by: Peter Xu <peterx@xxxxxxxxxx> > Signed-off-by: Axel Rasmussen <axelrasmussen@xxxxxxxxxx> > --- > arch/arm64/mm/hugetlbpage.c | 3 +-- > include/linux/hugetlb.h | 15 +++++++++++++++ > include/linux/userfaultfd_k.h | 9 +++++++++ > mm/hugetlb.c | 5 ++--- > 4 files changed, 27 insertions(+), 5 deletions(-) > > diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c > index 5b32ec888698..1a8ce0facfe8 100644 > --- a/arch/arm64/mm/hugetlbpage.c > +++ b/arch/arm64/mm/hugetlbpage.c > @@ -284,8 +284,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, > */ > ptep = pte_alloc_map(mm, pmdp, addr); > } else if (sz == PMD_SIZE) { > - if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && > - pud_none(READ_ONCE(*pudp))) > + if (want_pmd_share(vma) && pud_none(READ_ONCE(*pudp))) > ptep = huge_pmd_share(mm, addr, pudp); > else > ptep = (pte_t *)pmd_alloc(mm, pudp, addr); > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h > index 1e0abb609976..4508136c8376 100644 > --- a/include/linux/hugetlb.h > +++ b/include/linux/hugetlb.h > @@ -11,6 +11,7 @@ > #include <linux/kref.h> > #include <linux/pgtable.h> > #include <linux/gfp.h> > +#include <linux/userfaultfd_k.h> > > struct ctl_table; > struct user_struct; > @@ -947,4 +948,18 @@ static inline __init void hugetlb_cma_check(void) > } > #endif > > +static inline bool want_pmd_share(struct vm_area_struct *vma) > +{ > +#ifdef CONFIG_USERFAULTFD > + if (uffd_disable_huge_pmd_share(vma)) > + return false; We are testing for uffd conditions that prevent sharing to determine if huge_pmd_share should be called. Since we have the vma, perhaps we should do the vma_sharable() test here as well? Or, perhaps delay all checks until we are in huge_pmd_share and add uffd_disable_huge_pmd_share to vma_sharable? -- Mike Kravetz > +#endif > + > +#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE > + return true; > +#else > + return false; > +#endif > +} > + > #endif /* _LINUX_HUGETLB_H */ > diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h > index a8e5f3ea9bb2..c63ccdae3eab 100644 > --- a/include/linux/userfaultfd_k.h > +++ b/include/linux/userfaultfd_k.h > @@ -52,6 +52,15 @@ static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, > return vma->vm_userfaultfd_ctx.ctx == vm_ctx.ctx; > } > > +/* > + * Never enable huge pmd sharing on uffd-wp registered vmas, because uffd-wp > + * protect information is per pgtable entry. > + */ > +static inline bool uffd_disable_huge_pmd_share(struct vm_area_struct *vma) > +{ > + return vma->vm_flags & VM_UFFD_WP; > +} > + > static inline bool userfaultfd_missing(struct vm_area_struct *vma) > { > return vma->vm_flags & VM_UFFD_MISSING; > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 07b23c81b1db..d46f50a99ff1 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -5371,7 +5371,7 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, > *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; > return 1; > } > -#define want_pmd_share() (1) > + > #else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ > pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) > { > @@ -5388,7 +5388,6 @@ void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, > unsigned long *start, unsigned long *end) > { > } > -#define want_pmd_share() (0) > #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ > > #ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB > @@ -5410,7 +5409,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, > pte = (pte_t *)pud; > } else { > BUG_ON(sz != PMD_SIZE); > - if (want_pmd_share() && pud_none(*pud)) > + if (want_pmd_share(vma) && pud_none(*pud)) > pte = huge_pmd_share(mm, addr, pud); > else > pte = (pte_t *)pmd_alloc(mm, pud, addr); >