* Suren Baghdasaryan <surenb@xxxxxxxxxx> [230801 18:07]: > Implicit vma locking inside vm_flags_reset() and vm_flags_reset_once() is > not obvious and makes it hard to understand where vma locking is happening. > Also in some cases (like in dup_userfaultfd()) vma should be locked earlier > than vma_flags modification. To make locking more visible, change these > functions to assert that the vma write lock is taken and explicitly lock > the vma beforehand. Fix userfaultfd functions which should lock the vma > earlier. > > Suggested-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxx> > Signed-off-by: Suren Baghdasaryan <surenb@xxxxxxxxxx> Reviewed-by: Liam R. Howlett <Liam.Howlett@xxxxxxxxxx> > --- > arch/powerpc/kvm/book3s_hv_uvmem.c | 1 + > drivers/infiniband/hw/hfi1/file_ops.c | 1 + > fs/userfaultfd.c | 6 ++++++ > include/linux/mm.h | 10 +++++++--- > mm/madvise.c | 5 ++--- > mm/mlock.c | 3 ++- > mm/mprotect.c | 1 + > 7 files changed, 20 insertions(+), 7 deletions(-) > > diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c > index 709ebd578394..e2d6f9327f77 100644 > --- a/arch/powerpc/kvm/book3s_hv_uvmem.c > +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c > @@ -410,6 +410,7 @@ static int kvmppc_memslot_page_merge(struct kvm *kvm, > ret = H_STATE; > break; > } > + vma_start_write(vma); > /* Copy vm_flags to avoid partial modifications in ksm_madvise */ > vm_flags = vma->vm_flags; > ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, > diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c > index a5ab22cedd41..5920bfc1e1c5 100644 > --- a/drivers/infiniband/hw/hfi1/file_ops.c > +++ b/drivers/infiniband/hw/hfi1/file_ops.c > @@ -344,6 +344,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) > goto done; > } > > + vma_start_write(vma); > /* > * vm_pgoff is used as a buffer selector cookie. Always mmap from > * the beginning. > diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c > index 7cecd49e078b..6cde95533dcd 100644 > --- a/fs/userfaultfd.c > +++ b/fs/userfaultfd.c > @@ -667,6 +667,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, > mmap_write_lock(mm); > for_each_vma(vmi, vma) { > if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) { > + vma_start_write(vma); > vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; > userfaultfd_set_vm_flags(vma, > vma->vm_flags & ~__VM_UFFD_FLAGS); > @@ -702,6 +703,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) > > octx = vma->vm_userfaultfd_ctx.ctx; > if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) { > + vma_start_write(vma); > vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; > userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS); > return 0; > @@ -783,6 +785,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma, > atomic_inc(&ctx->mmap_changing); > } else { > /* Drop uffd context if remap feature not enabled */ > + vma_start_write(vma); > vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; > userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS); > } > @@ -940,6 +943,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) > prev = vma; > } > > + vma_start_write(vma); > userfaultfd_set_vm_flags(vma, new_flags); > vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; > } > @@ -1502,6 +1506,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, > * the next vma was merged into the current one and > * the current one has not been updated yet. > */ > + vma_start_write(vma); > userfaultfd_set_vm_flags(vma, new_flags); > vma->vm_userfaultfd_ctx.ctx = ctx; > > @@ -1685,6 +1690,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, > * the next vma was merged into the current one and > * the current one has not been updated yet. > */ > + vma_start_write(vma); > userfaultfd_set_vm_flags(vma, new_flags); > vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 262b5f44101d..2c720c9bb1ae 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -780,18 +780,22 @@ static inline void vm_flags_init(struct vm_area_struct *vma, > ACCESS_PRIVATE(vma, __vm_flags) = flags; > } > > -/* Use when VMA is part of the VMA tree and modifications need coordination */ > +/* > + * Use when VMA is part of the VMA tree and modifications need coordination > + * Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and > + * it should be locked explicitly beforehand. > + */ > static inline void vm_flags_reset(struct vm_area_struct *vma, > vm_flags_t flags) > { > - vma_start_write(vma); > + vma_assert_write_locked(vma); > vm_flags_init(vma, flags); > } > > static inline void vm_flags_reset_once(struct vm_area_struct *vma, > vm_flags_t flags) > { > - vma_start_write(vma); > + vma_assert_write_locked(vma); > WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags); > } > > diff --git a/mm/madvise.c b/mm/madvise.c > index bfe0e06427bd..507b1d299fec 100644 > --- a/mm/madvise.c > +++ b/mm/madvise.c > @@ -173,9 +173,8 @@ static int madvise_update_vma(struct vm_area_struct *vma, > } > > success: > - /* > - * vm_flags is protected by the mmap_lock held in write mode. > - */ > + /* vm_flags is protected by the mmap_lock held in write mode. */ > + vma_start_write(vma); > vm_flags_reset(vma, new_flags); > if (!vma->vm_file || vma_is_anon_shmem(vma)) { > error = replace_anon_vma_name(vma, anon_name); > diff --git a/mm/mlock.c b/mm/mlock.c > index 479e09d0994c..06bdfab83b58 100644 > --- a/mm/mlock.c > +++ b/mm/mlock.c > @@ -387,6 +387,7 @@ static void mlock_vma_pages_range(struct vm_area_struct *vma, > */ > if (newflags & VM_LOCKED) > newflags |= VM_IO; > + vma_start_write(vma); > vm_flags_reset_once(vma, newflags); > > lru_add_drain(); > @@ -461,9 +462,9 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, > * It's okay if try_to_unmap_one unmaps a page just after we > * set VM_LOCKED, populate_vma_page_range will bring it back. > */ > - > if ((newflags & VM_LOCKED) && (oldflags & VM_LOCKED)) { > /* No work to do, and mlocking twice would be wrong */ > + vma_start_write(vma); > vm_flags_reset(vma, newflags); > } else { > mlock_vma_pages_range(vma, start, end, newflags); > diff --git a/mm/mprotect.c b/mm/mprotect.c > index 3aef1340533a..362e190a8f81 100644 > --- a/mm/mprotect.c > +++ b/mm/mprotect.c > @@ -657,6 +657,7 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb, > * vm_flags and vm_page_prot are protected by the mmap_lock > * held in write mode. > */ > + vma_start_write(vma); > vm_flags_reset(vma, newflags); > if (vma_wants_manual_pte_write_upgrade(vma)) > mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE; > -- > 2.41.0.585.gd2178a4bd4-goog >