Android reported a regression in the userfaultfd unmap path. A closer inspection on the userfaultfd_unmap_prep() change showed that a second tree walk would be necessary in the reworked code. Fix the regression by passing each VMA that will be unmapped through to the userfaultfd_unmap_prep() function as they are added to the unmap list, instead of re-walking the tree for the VMA. Fixes: 69dbe6daf104 ("userfaultfd: use maple tree iterator to iterate VMAs") Reported-by: Suren Baghdasaryan <surenb@xxxxxxxxxx> Suggested-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> Signed-off-by: Liam R. Howlett <Liam.Howlett@xxxxxxxxxx> --- fs/userfaultfd.c | 35 +++++++++++++++-------------------- include/linux/userfaultfd_k.h | 6 +++--- mm/mmap.c | 31 +++++++++++++++---------------- 3 files changed, 33 insertions(+), 39 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 4e800bb7d2ab..a2a42a02848f 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -857,31 +857,26 @@ static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps, return false; } -int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start, +int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct list_head *unmaps) { - VMA_ITERATOR(vmi, mm, start); - struct vm_area_struct *vma; - - for_each_vma_range(vmi, vma, end) { - struct userfaultfd_unmap_ctx *unmap_ctx; - struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx; + struct userfaultfd_unmap_ctx *unmap_ctx; + struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx; - if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) || - has_unmap_ctx(ctx, unmaps, start, end)) - continue; + if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) || + has_unmap_ctx(ctx, unmaps, start, end)) + return 0; - unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL); - if (!unmap_ctx) - return -ENOMEM; + unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL); + if (!unmap_ctx) + return -ENOMEM; - userfaultfd_ctx_get(ctx); - atomic_inc(&ctx->mmap_changing); - unmap_ctx->ctx = ctx; - unmap_ctx->start = start; - unmap_ctx->end = end; - list_add_tail(&unmap_ctx->list, unmaps); - } + userfaultfd_ctx_get(ctx); + atomic_inc(&ctx->mmap_changing); + unmap_ctx->ctx = ctx; + unmap_ctx->start = start; + unmap_ctx->end = end; + list_add_tail(&unmap_ctx->list, unmaps); return 0; } diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index d78b01524349..ac7b0c96d351 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -188,8 +188,8 @@ extern bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end); -extern int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start, - unsigned long end, struct list_head *uf); +extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, + unsigned long start, unsigned long end, struct list_head *uf); extern void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf); extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma); @@ -271,7 +271,7 @@ static inline bool userfaultfd_remove(struct vm_area_struct *vma, return true; } -static inline int userfaultfd_unmap_prep(struct mm_struct *mm, +static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct list_head *uf) { diff --git a/mm/mmap.c b/mm/mmap.c index 99dc60f64816..1503a7bdb192 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2420,6 +2420,21 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, goto munmap_sidetree_failed; count++; + if (unlikely(uf)) { + /* + * If userfaultfd_unmap_prep returns an error the vmas + * will remain split, but userland will get a + * highly unexpected error anyway. This is no + * different than the case where the first of the two + * __split_vma fails, but we don't undo the first + * split, despite we could. This is unlikely enough + * failure that it's not worth optimizing it for. + */ + error = userfaultfd_unmap_prep(next, start, end, uf); + + if (error) + goto userfaultfd_error; + } #ifdef CONFIG_DEBUG_VM_MAPLE_TREE BUG_ON(next->vm_start < start); BUG_ON(next->vm_start > end); @@ -2432,22 +2447,6 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, if (!next) next = vma_next(vmi); - if (unlikely(uf)) { - /* - * If userfaultfd_unmap_prep returns an error the vmas - * will remain split, but userland will get a - * highly unexpected error anyway. This is no - * different than the case where the first of the two - * __split_vma fails, but we don't undo the first - * split, despite we could. This is unlikely enough - * failure that it's not worth optimizing it for. - */ - error = userfaultfd_unmap_prep(mm, start, end, uf); - - if (error) - goto userfaultfd_error; - } - #if defined(CONFIG_DEBUG_VM_MAPLE_TREE) /* Make sure no VMAs are about to be lost. */ { -- 2.39.2