On Tue, Sep 26, 2023 at 10:25 PM Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> wrote: > > It is usually safe to call wp_page_copy() under the VMA lock. The only > unsafe situation is when no anon_vma has been allocated for this VMA, > and we have to look at adjacent VMAs to determine if their anon_vma can > be shared. Since this happens only for the first COW of a page in this > VMA, the majority of calls to wp_page_copy() do not need to fall back > to the mmap_sem. > > Add vmf_anon_prepare() as an alternative to anon_vma_prepare() which > will return RETRY if we currently hold the VMA lock and need to allocate > an anon_vma. This lets us drop the check in do_wp_page(). > > Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> > --- > mm/memory.c | 39 ++++++++++++++++++++++++++------------- > 1 file changed, 26 insertions(+), 13 deletions(-) > > diff --git a/mm/memory.c b/mm/memory.c > index 97f860d6cd2a..cff78c496728 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -3042,6 +3042,21 @@ static inline void wp_page_reuse(struct vm_fault *vmf) > count_vm_event(PGREUSE); > } > > +static vm_fault_t vmf_anon_prepare(struct vm_fault *vmf) > +{ > + struct vm_area_struct *vma = vmf->vma; > + > + if (likely(vma->anon_vma)) > + return 0; > + if (vmf->flags & FAULT_FLAG_VMA_LOCK) { I don't think the above condition will happen today because lock_vma_under_rcu() returns NULL and do_page_fault() falls back to taking mmap_lock when !vma->anon_vma (https://elixir.bootlin.com/linux/v6.6-rc3/source/mm/memory.c#L5428). We would need to narrow down that check in lock_vma_under_rcu() to make this work here. > + vma_end_read(vma); > + return VM_FAULT_RETRY; > + } > + if (__anon_vma_prepare(vma)) > + return VM_FAULT_OOM; > + return 0; > +} > + > /* > * Handle the case of a page which we actually need to copy to a new page, > * either due to COW or unsharing. > @@ -3069,27 +3084,29 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) > pte_t entry; > int page_copied = 0; > struct mmu_notifier_range range; > - int ret; > + vm_fault_t ret; > > delayacct_wpcopy_start(); > > if (vmf->page) > old_folio = page_folio(vmf->page); > - if (unlikely(anon_vma_prepare(vma))) > - goto oom; > + ret = vmf_anon_prepare(vmf); > + if (unlikely(ret)) > + goto out; > > if (is_zero_pfn(pte_pfn(vmf->orig_pte))) { > new_folio = vma_alloc_zeroed_movable_folio(vma, vmf->address); > if (!new_folio) > goto oom; > } else { > + int err; > new_folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, > vmf->address, false); > if (!new_folio) > goto oom; > > - ret = __wp_page_copy_user(&new_folio->page, vmf->page, vmf); > - if (ret) { > + err = __wp_page_copy_user(&new_folio->page, vmf->page, vmf); > + if (err) { > /* > * COW failed, if the fault was solved by other, > * it's fine. If not, userspace would re-fault on > @@ -3102,7 +3119,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) > folio_put(old_folio); > > delayacct_wpcopy_end(); > - return ret == -EHWPOISON ? VM_FAULT_HWPOISON : 0; > + return err == -EHWPOISON ? VM_FAULT_HWPOISON : 0; > } > kmsan_copy_page_meta(&new_folio->page, vmf->page); > } > @@ -3212,11 +3229,13 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) > oom_free_new: > folio_put(new_folio); > oom: > + ret = VM_FAULT_OOM; > +out: > if (old_folio) > folio_put(old_folio); > > delayacct_wpcopy_end(); > - return VM_FAULT_OOM; > + return ret; > } > > /** > @@ -3458,12 +3477,6 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) > return 0; > } > copy: > - if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma->anon_vma) { > - pte_unmap_unlock(vmf->pte, vmf->ptl); > - vma_end_read(vmf->vma); > - return VM_FAULT_RETRY; > - } > - > /* > * Ok, we need to copy. Oh, well.. > */ > -- > 2.40.1 >