RE: [RFC v1 1/3] mm/mmu_notifier: Add a new notifier for mapping updates (new pages)

"Kasireddy, Vivek" <vivek.kasireddy@xxxxxxxxx> · Thu, 20 Jul 2023 07:43:11 +0000

Hi Alistair,

> 
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index 64a3239b6407..1f2f0209101a 100644
> > --- a/mm/hugetlb.c
> > +++ b/mm/hugetlb.c
> > @@ -6096,8 +6096,12 @@ vm_fault_t hugetlb_fault(struct mm_struct
> *mm, struct vm_area_struct *vma,
> >  		 * hugetlb_no_page will drop vma lock and hugetlb fault
> >  		 * mutex internally, which make us return immediately.
> >  		 */
> > -		return hugetlb_no_page(mm, vma, mapping, idx, address,
> ptep,
> > +		ret = hugetlb_no_page(mm, vma, mapping, idx, address,
> ptep,
> >  				      entry, flags);
> > +		if (!ret)
> > +			mmu_notifier_update_mapping(vma->vm_mm,
> address,
> > +						    pte_pfn(*ptep));
> 
> The next patch ends up calling pfn_to_page() on the result of
> pte_pfn(*ptep). I don't think that's safe because couldn't the PTE have
> already changed and/or the new page have been freed?
Yeah, that might be possible; I believe the right thing to do would be:
-               return hugetlb_no_page(mm, vma, mapping, idx, address, ptep,
+               ret = hugetlb_no_page(mm, vma, mapping, idx, address, ptep,
                                      entry, flags);
+               if (!ret) {
+                       ptl = huge_pte_lock(h, mm, ptep);
+                       mmu_notifier_update_mapping(vma->vm_mm, address,
+                                                    pte_pfn(*ptep));
+                       spin_unlock(ptl);
+               }

In which case I'd need to make a similar change in the shmem path as well.
And, also redo (or eliminate) the locking in udmabuf (patch) which seems a
bit excessive on a second look given our use-case (where reads and writes do
not happen simultaneously due to fence synchronization in the guest driver). 

Thanks,
Vivek

> 
> > +		return ret;
> >
> >  	ret = 0;
> >
> > @@ -6223,6 +6227,9 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm,
> struct vm_area_struct *vma,
> >  	 */
> >  	if (need_wait_lock)
> >  		folio_wait_locked(folio);
> > +	if (!ret)
> > +		mmu_notifier_update_mapping(vma->vm_mm, address,
> > +					    pte_pfn(*ptep));
> >  	return ret;
> >  }
> >
> > diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
> > index 50c0dde1354f..6421405334b9 100644
> > --- a/mm/mmu_notifier.c
> > +++ b/mm/mmu_notifier.c
> > @@ -441,6 +441,23 @@ void __mmu_notifier_change_pte(struct
> mm_struct *mm, unsigned long address,
> >  	srcu_read_unlock(&srcu, id);
> >  }
> >
> > +void __mmu_notifier_update_mapping(struct mm_struct *mm, unsigned
> long address,
> > +				   unsigned long pfn)
> > +{
> > +	struct mmu_notifier *subscription;
> > +	int id;
> > +
> > +	id = srcu_read_lock(&srcu);
> > +	hlist_for_each_entry_rcu(subscription,
> > +				 &mm->notifier_subscriptions->list, hlist,
> > +				 srcu_read_lock_held(&srcu)) {
> > +		if (subscription->ops->update_mapping)
> > +			subscription->ops->update_mapping(subscription,
> mm,
> > +							  address, pfn);
> > +	}
> > +	srcu_read_unlock(&srcu, id);
> > +}
> > +
> >  static int mn_itree_invalidate(struct mmu_notifier_subscriptions
> *subscriptions,
> >  			       const struct mmu_notifier_range *range)
> >  {
> > diff --git a/mm/shmem.c b/mm/shmem.c
> > index 2f2e0e618072..e59eb5fafadb 100644
> > --- a/mm/shmem.c
> > +++ b/mm/shmem.c
> > @@ -77,6 +77,7 @@ static struct vfsmount *shm_mnt;
> >  #include <linux/fcntl.h>
> >  #include <uapi/linux/memfd.h>
> >  #include <linux/rmap.h>
> > +#include <linux/mmu_notifier.h>
> >  #include <linux/uuid.h>
> >
> >  #include <linux/uaccess.h>
> > @@ -2164,8 +2165,12 @@ static vm_fault_t shmem_fault(struct vm_fault
> *vmf)
> >  				  gfp, vma, vmf, &ret);
> >  	if (err)
> >  		return vmf_error(err);
> > -	if (folio)
> > +	if (folio) {
> >  		vmf->page = folio_file_page(folio, vmf->pgoff);
> > +		if (ret == VM_FAULT_LOCKED)
> > +			mmu_notifier_update_mapping(vma->vm_mm, vmf-
> >address,
> > +						    page_to_pfn(vmf->page));
> > +	}
> >  	return ret;
> >  }