> On Sep 24, 2024, at 14:11, Qi Zheng <zhengqi.arch@xxxxxxxxxxxxx> wrote: > In collapse_pte_mapped_thp(), we may modify the pte and pmd entry after > acquring the ptl, so convert it to using pte_offset_map_rw_nolock(). At > this time, the pte_same() check is not performed after the PTL held. So we > should get pgt_pmd and do pmd_same() check after the ptl held. > > Signed-off-by: Qi Zheng <zhengqi.arch@xxxxxxxxxxxxx> > --- > mm/khugepaged.c | 14 +++++++++++--- > 1 file changed, 11 insertions(+), 3 deletions(-) > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index 6498721d4783a..8ab79c13d077f 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -1605,7 +1605,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, > if (userfaultfd_armed(vma) && !(vma->vm_flags & VM_SHARED)) > pml = pmd_lock(mm, pmd); > > - start_pte = pte_offset_map_nolock(mm, pmd, haddr, &ptl); > + start_pte = pte_offset_map_rw_nolock(mm, pmd, haddr, &pgt_pmd, &ptl); > if (!start_pte) /* mmap_lock + page lock should prevent this */ > goto abort; > if (!pml) > @@ -1613,6 +1613,9 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, > else if (ptl != pml) > spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); > > + if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd)))) > + goto abort; > + > /* step 2: clear page table and adjust rmap */ > for (i = 0, addr = haddr, pte = start_pte; > i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) { > @@ -1645,7 +1648,6 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, > nr_ptes++; > } > > - pte_unmap(start_pte); > if (!pml) > spin_unlock(ptl); > > @@ -1658,13 +1660,19 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, > /* step 4: remove empty page table */ > if (!pml) { > pml = pmd_lock(mm, pmd); > - if (ptl != pml) > + if (ptl != pml) { > spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); > + if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd)))) { > + spin_unlock(pml); > + goto abort; Drop the reference of folio and the mm counter twice at the label of abort and the step 3. > + } > + } > } > pgt_pmd = pmdp_collapse_flush(vma, haddr, pmd); > pmdp_get_lockless_sync(); > if (ptl != pml) > spin_unlock(ptl); > + pte_unmap(start_pte); > spin_unlock(pml); Why not? pte_unmap_unlock(start_pte, ptl); if (pml != ptl) spin_unlock(pml); > > mmu_notifier_invalidate_range_end(&range); > -- > 2.20.1