There are many implementations of ->fault and some of them depend on mmap_lock being held. All vm_ops that implement ->map_pages() end up calling filemap_fault(), which I have audited to be sure it does not rely on mmap_lock. So (for now) key off ->map_pages existing as being a flag to indicate that it's safe to call ->fault while only holding the vma lock. Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> --- mm/memory.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index cff78c496728..0f3da4889230 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3042,6 +3042,21 @@ static inline void wp_page_reuse(struct vm_fault *vmf) count_vm_event(PGREUSE); } +/* + * We could add a bitflag somewhere, but for now, we know that all + * vm_ops that have a ->map_pages have been audited and don't need + * the mmap_lock to be held. + */ +static inline vm_fault_t vmf_maybe_unlock_vma(const struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + + if (vma->vm_ops->map_pages || !(vmf->flags & FAULT_FLAG_VMA_LOCK)) + return 0; + vma_end_read(vma); + return VM_FAULT_RETRY; +} + static vm_fault_t vmf_anon_prepare(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; @@ -4669,10 +4684,9 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf) vm_fault_t ret, tmp; struct folio *folio; - if (vmf->flags & FAULT_FLAG_VMA_LOCK) { - vma_end_read(vma); - return VM_FAULT_RETRY; - } + ret = vmf_maybe_unlock_vma(vmf); + if (ret) + return ret; ret = __do_fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) -- 2.40.1