On Fri, May 31, 2024 at 9:33 AM Liam R. Howlett <Liam.Howlett@xxxxxxxxxx> wrote: > > Split the munmap function into a gathering of vmas and a cleanup of the > gathered vmas. This is necessary for the later patches in the series. > > Signed-off-by: Liam R. Howlett <Liam.Howlett@xxxxxxxxxx> The refactoring looks correct but it's quite painful to verify all the pieces. Not sure if it could have been refactored in more gradual steps... Reviewed-by: Suren Baghdasaryan <surenb@xxxxxxxxxx> > --- > mm/mmap.c | 143 ++++++++++++++++++++++++++++++++++++++---------------- > 1 file changed, 101 insertions(+), 42 deletions(-) > > diff --git a/mm/mmap.c b/mm/mmap.c > index 31d464e6a656..fad40d604c64 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -2340,6 +2340,7 @@ static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas) > > if (vma->vm_flags & VM_ACCOUNT) > nr_accounted += nrpages; > + nit: here and below a couple of unnecessary empty lines. > vm_stat_account(mm, vma->vm_flags, -nrpages); > remove_vma(vma, false); > } > @@ -2545,33 +2546,45 @@ struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi, > vma->vm_userfaultfd_ctx, anon_vma_name(vma)); > } > > + > +static inline void abort_munmap_vmas(struct ma_state *mas_detach) > +{ > + struct vm_area_struct *vma; > + int limit; > + > + limit = mas_detach->index; > + mas_set(mas_detach, 0); > + /* Re-attach any detached VMAs */ > + mas_for_each(mas_detach, vma, limit) > + vma_mark_detached(vma, false); > + > + __mt_destroy(mas_detach->tree); > +} > + > /* > - * do_vmi_align_munmap() - munmap the aligned region from @start to @end. > + * vmi_gather_munmap_vmas() - Put all VMAs within a range into a maple tree > + * for removal at a later date. Handles splitting first and last if necessary > + * and marking the vmas as isolated. > + * > * @vmi: The vma iterator > * @vma: The starting vm_area_struct > * @mm: The mm_struct > * @start: The aligned start address to munmap. > * @end: The aligned end address to munmap. > * @uf: The userfaultfd list_head > - * @unlock: Set to true to drop the mmap_lock. unlocking only happens on > - * success. > + * @mas_detach: The maple state tracking the detached tree > * > - * Return: 0 on success and drops the lock if so directed, error and leaves the > - * lock held otherwise. > + * Return: 0 on success > */ > static int > -do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > +vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, > struct mm_struct *mm, unsigned long start, > - unsigned long end, struct list_head *uf, bool unlock) > + unsigned long end, struct list_head *uf, > + struct ma_state *mas_detach, unsigned long *locked_vm) > { > - struct vm_area_struct *prev, *next = NULL; > - struct maple_tree mt_detach; > - int count = 0; > + struct vm_area_struct *next = NULL; > int error = -ENOMEM; > - unsigned long locked_vm = 0; > - MA_STATE(mas_detach, &mt_detach, 0, 0); > - mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); > - mt_on_stack(mt_detach); > + int count = 0; > > /* > * If we need to split any vma, do it now to save pain later. > @@ -2610,15 +2623,14 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > goto end_split_failed; > } > vma_start_write(next); > - mas_set(&mas_detach, count); > - error = mas_store_gfp(&mas_detach, next, GFP_KERNEL); > + mas_set(mas_detach, count++); > + if (next->vm_flags & VM_LOCKED) > + *locked_vm += vma_pages(next); > + > + error = mas_store_gfp(mas_detach, next, GFP_KERNEL); > if (error) > goto munmap_gather_failed; > vma_mark_detached(next, true); > - if (next->vm_flags & VM_LOCKED) > - locked_vm += vma_pages(next); > - > - count++; > if (unlikely(uf)) { > /* > * If userfaultfd_unmap_prep returns an error the vmas > @@ -2643,7 +2655,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > #if defined(CONFIG_DEBUG_VM_MAPLE_TREE) > /* Make sure no VMAs are about to be lost. */ > { > - MA_STATE(test, &mt_detach, 0, 0); > + MA_STATE(test, mas_detach->tree, 0, 0); > struct vm_area_struct *vma_mas, *vma_test; > int test_count = 0; > > @@ -2663,13 +2675,29 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > while (vma_iter_addr(vmi) > start) > vma_iter_prev_range(vmi); > > - error = vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL); > - if (error) > - goto clear_tree_failed; > + return 0; > > - /* Point of no return */ > - mm->locked_vm -= locked_vm; > +userfaultfd_error: > +munmap_gather_failed: > +end_split_failed: > + abort_munmap_vmas(mas_detach); > +start_split_failed: > +map_count_exceeded: > + return error; > +} > + > +static void > +vmi_complete_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, > + struct mm_struct *mm, unsigned long start, > + unsigned long end, bool unlock, struct ma_state *mas_detach, > + unsigned long locked_vm) > +{ > + struct vm_area_struct *prev, *next; > + int count; > + > + count = mas_detach->index + 1; > mm->map_count -= count; > + mm->locked_vm -= locked_vm; > if (unlock) > mmap_write_downgrade(mm); > > @@ -2682,30 +2710,61 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > * We can free page tables without write-locking mmap_lock because VMAs > * were isolated before we downgraded mmap_lock. > */ > - mas_set(&mas_detach, 1); > - unmap_region(mm, &mas_detach, vma, prev, next, start, end, count, > + mas_set(mas_detach, 1); > + unmap_region(mm, mas_detach, vma, prev, next, start, end, count, > !unlock); > /* Statistics and freeing VMAs */ > - mas_set(&mas_detach, 0); > - remove_mt(mm, &mas_detach); > + mas_set(mas_detach, 0); > + remove_mt(mm, mas_detach); > validate_mm(mm); > if (unlock) > mmap_read_unlock(mm); > > - __mt_destroy(&mt_detach); > - return 0; > + __mt_destroy(mas_detach->tree); > +} > > -clear_tree_failed: > -userfaultfd_error: > -munmap_gather_failed: > -end_split_failed: > - mas_set(&mas_detach, 0); > - mas_for_each(&mas_detach, next, end) > - vma_mark_detached(next, false); > +/* > + * do_vmi_align_munmap() - munmap the aligned region from @start to @end. > + * @vmi: The vma iterator > + * @vma: The starting vm_area_struct > + * @mm: The mm_struct > + * @start: The aligned start address to munmap. > + * @end: The aligned end address to munmap. > + * @uf: The userfaultfd list_head > + * @unlock: Set to true to drop the mmap_lock. unlocking only happens on > + * success. > + * > + * Return: 0 on success and drops the lock if so directed, error and leaves the > + * lock held otherwise. > + */ > +static int > +do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > + struct mm_struct *mm, unsigned long start, > + unsigned long end, struct list_head *uf, bool unlock) > +{ > + struct maple_tree mt_detach; > + MA_STATE(mas_detach, &mt_detach, 0, 0); > + mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); > + mt_on_stack(mt_detach); > + int error; > + unsigned long locked_vm = 0; > > - __mt_destroy(&mt_detach); > -start_split_failed: > -map_count_exceeded: > + error = vmi_gather_munmap_vmas(vmi, vma, mm, start, end, uf, > + &mas_detach, &locked_vm); > + if (error) > + goto gather_failed; > + > + error = vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL); > + if (error) > + goto clear_area_failed; > + > + vmi_complete_munmap_vmas(vmi, vma, mm, start, end, unlock, &mas_detach, > + locked_vm); > + return 0; > + > +clear_area_failed: > + abort_munmap_vmas(&mas_detach); > +gather_failed: > validate_mm(mm); > return error; > } > -- > 2.43.0 >