Existing lock_vma_under_rcu() API assumes exact VMA match, so it's not a 100% equivalent of find_vma(). There are use cases that do want find_vma() semantics of finding an exact VMA or the next one. Also, it's important for such an API to let user distinguish between not being able to get per-VMA lock and not having any VMAs at or after provided address. As such, this patch adds a new find_vma()-like API, find_and_lock_vma_rcu(), which finds exact or next VMA, attempts to take per-VMA lock, and if that fails, returns ERR_PTR(-EBUSY). It still returns NULL if there is no VMA at or after address. In successfuly case it will return valid and non-isolated VMA with VMA lock taken. This API will be used in subsequent patch in this patch set to implement a new user-facing API for querying process VMAs. Cc: Mike Rapoport <rppt@xxxxxxxxxx> Cc: Suren Baghdasaryan <surenb@xxxxxxxxxx> Cc: Liam Howlett <liam.howlett@xxxxxxxxxx> Signed-off-by: Andrii Nakryiko <andrii@xxxxxxxxxx> --- include/linux/mm.h | 8 ++++++ mm/memory.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 9849dfda44d4..a6846401da77 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -776,6 +776,8 @@ static inline void assert_fault_locked(struct vm_fault *vmf) mmap_assert_locked(vmf->vma->vm_mm); } +struct vm_area_struct *find_and_lock_vma_rcu(struct mm_struct *mm, + unsigned long address); struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, unsigned long address); @@ -790,6 +792,12 @@ static inline void vma_assert_write_locked(struct vm_area_struct *vma) static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached) {} +struct vm_area_struct *find_and_lock_vma_rcu(struct mm_struct *mm, + unsigned long address) +{ + return -EOPNOTSUPP; +} + static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, unsigned long address) { diff --git a/mm/memory.c b/mm/memory.c index b5453b86ec4b..9d0413e98d8b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5824,6 +5824,68 @@ struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm, #endif #ifdef CONFIG_PER_VMA_LOCK +/* + * find_and_lock_vma_rcu() - Find and lock the VMA for a given address, or the + * next VMA. Search is done under RCU protection, without taking or assuming + * mmap_lock. Returned VMA is guaranteed to be stable and not isolated. + + * @mm: The mm_struct to check + * @addr: The address + * + * Returns: The VMA associated with addr, or the next VMA. + * May return %NULL in the case of no VMA at addr or above. + * If the VMA is being modified and can't be locked, -EBUSY is returned. + */ +struct vm_area_struct *find_and_lock_vma_rcu(struct mm_struct *mm, + unsigned long address) +{ + MA_STATE(mas, &mm->mm_mt, address, address); + struct vm_area_struct *vma; + int err; + + rcu_read_lock(); +retry: + vma = mas_find(&mas, ULONG_MAX); + if (!vma) { + err = 0; /* no VMA, return NULL */ + goto inval; + } + + if (!vma_start_read(vma)) { + err = -EBUSY; + goto inval; + } + + /* + * Check since vm_start/vm_end might change before we lock the VMA. + * Note, unlike lock_vma_under_rcu() we are searching for VMA covering + * address or the next one, so we only make sure VMA wasn't updated to + * end before the address. + */ + if (unlikely(vma->vm_end <= address)) { + err = -EBUSY; + goto inval_end_read; + } + + /* Check if the VMA got isolated after we found it */ + if (vma->detached) { + vma_end_read(vma); + count_vm_vma_lock_event(VMA_LOCK_MISS); + /* The area was replaced with another one */ + goto retry; + } + + rcu_read_unlock(); + return vma; + +inval_end_read: + vma_end_read(vma); +inval: + rcu_read_unlock(); + count_vm_vma_lock_event(VMA_LOCK_ABORT); + return ERR_PTR(err); +} + /* * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be * stable and not isolated. If the VMA is not found or is being modified the -- 2.43.0