On 10/21/22 16:36, James Houghton wrote: > hugetlb_vma_maps_page was mostly used as an optimization: if the VMA > isn't mapping a page, then we don't have to attempt to unmap it again. > We are still able to call the unmap routine if we need to. > > For high-granularity mapped pages, we can't easily do a full walk to see > if the page is actually mapped or not, so simply return that it might > be. > > Signed-off-by: James Houghton <jthoughton@xxxxxxxxxx> > --- > fs/hugetlbfs/inode.c | 27 +++++++++++++++++++++------ > 1 file changed, 21 insertions(+), 6 deletions(-) > > diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c > index 7f836f8f9db1..a7ab62e39b8c 100644 > --- a/fs/hugetlbfs/inode.c > +++ b/fs/hugetlbfs/inode.c > @@ -383,21 +383,34 @@ static void hugetlb_delete_from_page_cache(struct folio *folio) > * mutex for the page in the mapping. So, we can not race with page being > * faulted into the vma. > */ > -static bool hugetlb_vma_maps_page(struct vm_area_struct *vma, > - unsigned long addr, struct page *page) > +static bool hugetlb_vma_maybe_maps_page(struct vm_area_struct *vma, > + unsigned long addr, struct page *page) > { > pte_t *ptep, pte; > + struct hugetlb_pte hpte; > + struct hstate *h = hstate_vma(vma); > > - ptep = huge_pte_offset(vma->vm_mm, addr, > - huge_page_size(hstate_vma(vma))); > + ptep = huge_pte_offset(vma->vm_mm, addr, huge_page_size(h)); > > if (!ptep) > return false; > > + hugetlb_pte_populate(&hpte, ptep, huge_page_shift(h), > + hpage_size_to_level(huge_page_size(h))); > + Only a nit, the hugetlb_pte_populate call should probably be after the check below. This makes sense, and as you mention hugetlb_vma_maybe_maps_page is mostly an optimization that will still work as designed for non HGM vmas. -- Mike Kravetz > pte = huge_ptep_get(ptep); > if (huge_pte_none(pte) || !pte_present(pte)) > return false; > > + if (!hugetlb_pte_present_leaf(&hpte, pte)) > + /* > + * The top-level PTE is not a leaf, so it's possible that a PTE > + * under us is mapping the page. We aren't holding the VMA > + * lock, so it is unsafe to continue the walk further. Instead, > + * return true to indicate that we might be mapping the page. > + */ > + return true; > + > if (pte_page(pte) == page) > return true; > > @@ -457,7 +470,8 @@ static void hugetlb_unmap_file_folio(struct hstate *h, > v_start = vma_offset_start(vma, start); > v_end = vma_offset_end(vma, end); > > - if (!hugetlb_vma_maps_page(vma, vma->vm_start + v_start, page)) > + if (!hugetlb_vma_maybe_maps_page(vma, vma->vm_start + v_start, > + page)) > continue; > > if (!hugetlb_vma_trylock_write(vma)) { > @@ -507,7 +521,8 @@ static void hugetlb_unmap_file_folio(struct hstate *h, > */ > v_start = vma_offset_start(vma, start); > v_end = vma_offset_end(vma, end); > - if (hugetlb_vma_maps_page(vma, vma->vm_start + v_start, page)) > + if (hugetlb_vma_maybe_maps_page(vma, vma->vm_start + v_start, > + page)) > unmap_hugepage_range(vma, vma->vm_start + v_start, > v_end, NULL, > ZAP_FLAG_DROP_MARKER); > -- > 2.38.0.135.g90850a2211-goog >