On 02/18/23 00:27, James Houghton wrote: > Fix how UFFDIO_CONTINUE and UFFDIO_WRITEPROTECT interact in these two > ways: > - UFFDIO_WRITEPROTECT no longer prevents a high-granularity > UFFDIO_CONTINUE. > - UFFD-WP PTE markers installed with UFFDIO_WRITEPROTECT will be > properly propagated when high-granularily UFFDIO_CONTINUEs are > performed. > > Note: UFFDIO_WRITEPROTECT is not yet permitted at PAGE_SIZE granularity. > > Signed-off-by: James Houghton <jthoughton@xxxxxxxxxx> > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 810c05feb41f..f74183acc521 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c Seems relatively straight forward, Acked-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx> -- Mike Kravetz > @@ -506,6 +506,30 @@ static bool has_same_uncharge_info(struct file_region *rg, > #endif > } > > +static void hugetlb_install_markers_pmd(pmd_t *pmdp, pte_marker marker) > +{ > + int i; > + > + for (i = 0; i < PTRS_PER_PMD; ++i) > + /* > + * WRITE_ONCE not needed because the pud hasn't been > + * installed yet. > + */ > + pmdp[i] = __pmd(pte_val(make_pte_marker(marker))); > +} > + > +static void hugetlb_install_markers_pte(pte_t *ptep, pte_marker marker) > +{ > + int i; > + > + for (i = 0; i < PTRS_PER_PTE; ++i) > + /* > + * WRITE_ONCE not needed because the pmd hasn't been > + * installed yet. > + */ > + ptep[i] = make_pte_marker(marker); > +} > + > /* > * hugetlb_alloc_pmd -- Allocate or find a PMD beneath a PUD-level hpte. > * > @@ -528,23 +552,32 @@ pmd_t *hugetlb_alloc_pmd(struct mm_struct *mm, struct hugetlb_pte *hpte, > pmd_t *new; > pud_t *pudp; > pud_t pud; > + bool is_marker; > + pte_marker marker; > > if (hpte->level != HUGETLB_LEVEL_PUD) > return ERR_PTR(-EINVAL); > > pudp = (pud_t *)hpte->ptep; > retry: > + is_marker = false; > pud = READ_ONCE(*pudp); > if (likely(pud_present(pud))) > return unlikely(pud_leaf(pud)) > ? ERR_PTR(-EEXIST) > : pmd_offset(pudp, addr); > - else if (!pud_none(pud)) > + else if (!pud_none(pud)) { > /* > - * Not present and not none means that a swap entry lives here, > - * and we can't get rid of it. > + * Not present and not none means that a swap entry lives here. > + * If it's a PTE marker, we can deal with it. If it's another > + * swap entry, we don't attempt to split it. > */ > - return ERR_PTR(-EEXIST); > + is_marker = is_pte_marker(__pte(pud_val(pud))); > + if (!is_marker) > + return ERR_PTR(-EEXIST); > + > + marker = pte_marker_get(pte_to_swp_entry(__pte(pud_val(pud)))); > + } > > new = pmd_alloc_one(mm, addr); > if (!new) > @@ -557,6 +590,13 @@ pmd_t *hugetlb_alloc_pmd(struct mm_struct *mm, struct hugetlb_pte *hpte, > goto retry; > } > > + /* > + * Install markers before PUD to avoid races with other > + * page tables walks. > + */ > + if (is_marker) > + hugetlb_install_markers_pmd(new, marker); > + > mm_inc_nr_pmds(mm); > smp_wmb(); /* See comment in pmd_install() */ > pud_populate(mm, pudp, new); > @@ -576,23 +616,32 @@ pte_t *hugetlb_alloc_pte(struct mm_struct *mm, struct hugetlb_pte *hpte, > pgtable_t new; > pmd_t *pmdp; > pmd_t pmd; > + bool is_marker; > + pte_marker marker; > > if (hpte->level != HUGETLB_LEVEL_PMD) > return ERR_PTR(-EINVAL); > > pmdp = (pmd_t *)hpte->ptep; > retry: > + is_marker = false; > pmd = READ_ONCE(*pmdp); > if (likely(pmd_present(pmd))) > return unlikely(pmd_leaf(pmd)) > ? ERR_PTR(-EEXIST) > : pte_offset_kernel(pmdp, addr); > - else if (!pmd_none(pmd)) > + else if (!pmd_none(pmd)) { > /* > - * Not present and not none means that a swap entry lives here, > - * and we can't get rid of it. > + * Not present and not none means that a swap entry lives here. > + * If it's a PTE marker, we can deal with it. If it's another > + * swap entry, we don't attempt to split it. > */ > - return ERR_PTR(-EEXIST); > + is_marker = is_pte_marker(__pte(pmd_val(pmd))); > + if (!is_marker) > + return ERR_PTR(-EEXIST); > + > + marker = pte_marker_get(pte_to_swp_entry(__pte(pmd_val(pmd)))); > + } > > /* > * With CONFIG_HIGHPTE, calling `pte_alloc_one` directly may result > @@ -613,6 +662,9 @@ pte_t *hugetlb_alloc_pte(struct mm_struct *mm, struct hugetlb_pte *hpte, > goto retry; > } > > + if (is_marker) > + hugetlb_install_markers_pte(page_address(new), marker); > + > mm_inc_nr_ptes(mm); > smp_wmb(); /* See comment in pmd_install() */ > pmd_populate(mm, pmdp, new); > @@ -7384,7 +7436,12 @@ static int __hugetlb_hgm_walk(struct mm_struct *mm, struct vm_area_struct *vma, > if (!pte_present(pte)) { > if (!alloc) > return 0; > - if (unlikely(!huge_pte_none(pte))) > + /* > + * In hugetlb_alloc_pmd and hugetlb_alloc_pte, > + * we split PTE markers, so we can tolerate > + * PTE markers here. > + */ > + if (unlikely(!huge_pte_none_mostly(pte))) > return -EEXIST; > } else if (hugetlb_pte_present_leaf(hpte, pte)) > return 0; > -- > 2.39.2.637.g21b0678d19-goog >