On Sun, 17 Jul 2022 02:46:32 +0000 Liam Howlett <liam.howlett@xxxxxxxxxx> wrote: > This is the v10 + fixes and a few clean ups. I'm seeing quite a lot of differences between this and what we had in mm-unstable. A surprising amount. Please check it all? --- include/linux/maple_tree.h 2022-07-16 21:05:04.697041964 -0700 +++ ../25-pre-maple/include/linux/maple_tree.h 2022-07-16 21:05:34.509477799 -0700 @@ -225,9 +225,9 @@ * @flags: The maple tree flags * */ -#define MTREE_INIT(name, __flags) { \ - .ma_lock = __SPIN_LOCK_UNLOCKED((name).ma_lock), \ - .ma_flags = __flags, \ +#define MTREE_INIT(name, flags) { \ + .ma_lock = __SPIN_LOCK_UNLOCKED(name.ma_lock), \ + .ma_flags = flags, \ .ma_root = NULL, \ } @@ -238,13 +238,13 @@ * @lock: The external lock */ #ifdef CONFIG_LOCKDEP -#define MTREE_INIT_EXT(name, __flags, __lock) { \ - .ma_external_lock = &(__lock).dep_map, \ - .ma_flags = (__flags), \ +#define MTREE_INIT_EXT(name, flags, lock) { \ + .ma_external_lock = &(lock).dep_map, \ + .ma_flags = flags, \ .ma_root = NULL, \ } #else -#define MTREE_INIT_EXT(name, __flags, __lock) MTREE_INIT(name, __flags) +#define MTREE_INIT_EXT(name, flags, lock) MTREE_INIT(name, flags) #endif #define DEFINE_MTREE(name) \ @@ -520,8 +520,8 @@ * Note: may return the zero entry. * */ -#define mas_for_each(__mas, __entry, __max) \ - while (((__entry) = mas_find((__mas), (__max))) != NULL) +#define mas_for_each(mas, entry, max) \ + while (((entry) = mas_find((mas), (max))) != NULL) /** @@ -654,9 +654,9 @@ * * Note: Will not return the zero entry. */ -#define mt_for_each(__tree, __entry, __index, __max) \ - for (__entry = mt_find(__tree, &(__index), __max); \ - __entry; __entry = mt_find_after(__tree, &(__index), __max)) +#define mt_for_each(tree, entry, index, max) \ + for (entry = mt_find(tree, &(index), max); \ + entry; entry = mt_find_after(tree, &(index), max)) #ifdef CONFIG_DEBUG_MAPLE_TREE @@ -665,12 +665,12 @@ void mt_dump(const struct maple_tree *mt); void mt_validate(struct maple_tree *mt); -#define MT_BUG_ON(__tree, __x) do { \ +#define MT_BUG_ON(tree, x) do { \ atomic_inc(&maple_tree_tests_run); \ - if (__x) { \ + if (x) { \ pr_info("BUG at %s:%d (%u)\n", \ - __func__, __LINE__, __x); \ - mt_dump(__tree); \ + __func__, __LINE__, x); \ + mt_dump(tree); \ pr_info("Pass: %u Run:%u\n", \ atomic_read(&maple_tree_tests_passed), \ atomic_read(&maple_tree_tests_run)); \ @@ -680,7 +680,7 @@ } \ } while (0) #else -#define MT_BUG_ON(__tree, __x) BUG_ON(__x) +#define MT_BUG_ON(tree, x) BUG_ON(x) #endif /* CONFIG_DEBUG_MAPLE_TREE */ #endif /*_LINUX_MAPLE_TREE_H */ --- include/linux/mm.h 2022-07-16 21:05:07.625084770 -0700 +++ ../25-pre-maple/include/linux/mm.h 2022-07-16 21:05:37.847526599 -0700 @@ -683,12 +683,11 @@ return vmi->mas.index; } -#define for_each_vma(__vmi, __vma) \ - while (((__vma) = vma_next(&(__vmi))) != NULL) +#define for_each_vma(vmi, vma) while ((vma = vma_next(&(vmi))) != NULL) /* The MM code likes to work with exclusive end addresses */ -#define for_each_vma_range(__vmi, __vma, __end) \ - while (((__vma) = vma_find(&(__vmi), (__end) - 1)) != NULL) +#define for_each_vma_range(vmi, vma, end) \ + while ((vma = vma_find(&(vmi), (end) - 1)) != NULL) #ifdef CONFIG_SHMEM /* --- include/linux/mm_types.h 2022-07-16 21:05:07.625084770 -0700 +++ ../25-pre-maple/include/linux/mm_types.h 2022-07-16 21:05:37.848526614 -0700 @@ -681,11 +681,11 @@ struct ma_state mas; }; -#define VMA_ITERATOR(name, __mm, __addr) \ +#define VMA_ITERATOR(name, mm, addr) \ struct vma_iterator name = { \ .mas = { \ - .tree = &(__mm)->mm_mt, \ - .index = __addr, \ + .tree = &mm->mm_mt, \ + .index = addr, \ .node = MAS_START, \ }, \ } --- mm/memory.c 2022-07-16 21:05:07.627084799 -0700 +++ ../25-pre-maple/mm/memory.c 2022-07-16 21:05:37.980528543 -0700 @@ -1699,6 +1699,7 @@ /** * unmap_vmas - unmap a range of memory covered by a list of vma's * @tlb: address of the caller's struct mmu_gather + * @mt: The maple tree pointer for the VMAs * @vma: the starting vma * @start_addr: virtual address at which to start unmapping * @end_addr: virtual address at which to end unmapping --- mm/mmap.c 2022-07-16 21:05:07.716086100 -0700 +++ ../25-pre-maple/mm/mmap.c 2022-07-16 21:05:38.104530356 -0700 @@ -341,27 +341,28 @@ MA_STATE(mas, mt, 0, 0); mt_validate(&mm->mm_mt); + mas_for_each(&mas, vma_mt, ULONG_MAX) { if ((vma_mt->vm_start != mas.index) || (vma_mt->vm_end - 1 != mas.last)) { pr_emerg("issue in %s\n", current->comm); dump_stack(); dump_vma(vma_mt); - pr_emerg("mt piv: %p %lu - %lu\n", vma_mt, + pr_emerg("mt piv: %px %lu - %lu\n", vma_mt, mas.index, mas.last); - pr_emerg("mt vma: %p %lu - %lu\n", vma_mt, + pr_emerg("mt vma: %px %lu - %lu\n", vma_mt, vma_mt->vm_start, vma_mt->vm_end); mt_dump(mas.tree); if (vma_mt->vm_end != mas.last + 1) { - pr_err("vma: %p vma_mt %lu-%lu\tmt %lu-%lu\n", + pr_err("vma: %px vma_mt %lu-%lu\tmt %lu-%lu\n", mm, vma_mt->vm_start, vma_mt->vm_end, mas.index, mas.last); mt_dump(mas.tree); } VM_BUG_ON_MM(vma_mt->vm_end != mas.last + 1, mm); if (vma_mt->vm_start != mas.index) { - pr_err("vma: %p vma_mt %p %lu - %lu doesn't match\n", + pr_err("vma: %px vma_mt %px %lu - %lu doesn't match\n", mm, vma_mt, vma_mt->vm_start, vma_mt->vm_end); mt_dump(mas.tree); } @@ -448,7 +449,7 @@ unsigned long vm_start = max(addr, vma->vm_start); unsigned long vm_end = min(end, vma->vm_end); - nr_pages += PHYS_PFN(vm_end - vm_start); + nr_pages += (vm_end - vm_start) / PAGE_SIZE; } return nr_pages; @@ -710,11 +711,11 @@ * remove_next == 1 is case 1 or 7. */ remove_next = 1 + (end > next->vm_end); - if (remove_next == 2) - next_next = find_vma(mm, next->vm_end); - + next_next = find_vma(mm, next->vm_end); VM_WARN_ON(remove_next == 2 && end != next_next->vm_end); + /* trim end to next, for case 6 first pass */ + end = next->vm_end; } exporter = next; @@ -725,7 +726,7 @@ * next, if the vma overlaps with it. */ if (remove_next == 2 && !next->anon_vma) - exporter = next_next; + exporter = find_vma(mm, next->vm_end); } else if (end > next->vm_start) { /* @@ -762,6 +763,7 @@ return error; } } +again: vma_adjust_trans_huge(orig_vma, start, end, adjust_next); if (mas_preallocate(&mas, vma, GFP_KERNEL)) { @@ -852,8 +854,6 @@ if (remove_next && file) { __remove_shared_vm_struct(next, file, mapping); - if (remove_next == 2) - __remove_shared_vm_struct(next_next, file, mapping); } else if (insert) { /* * split_vma has split insert from vma, and needs @@ -881,7 +881,6 @@ } if (remove_next) { -again: if (file) { uprobe_munmap(next, next->vm_start, next->vm_end); fput(file); @@ -890,22 +889,45 @@ anon_vma_merge(vma, next); mm->map_count--; mpol_put(vma_policy(next)); - if (remove_next != 2) - BUG_ON(vma->vm_end < next->vm_end); + BUG_ON(vma->vm_end < next->vm_end); vm_area_free(next); /* * In mprotect's case 6 (see comments on vma_merge), - * we must remove next_next too. + * we must remove another next too. It would clutter + * up the code too much to do both in one go. */ + if (remove_next != 3) { + /* + * If "next" was removed and vma->vm_end was + * expanded (up) over it, in turn + * "next->prev->vm_end" changed and the + * "vma->next" gap must be updated. + */ + next = next_next; + } else { + /* + * For the scope of the comment "next" and + * "vma" considered pre-swap(): if "vma" was + * removed, next->vm_start was expanded (down) + * over it and the "next" gap must be updated. + * Because of the swap() the post-swap() "vma" + * actually points to pre-swap() "next" + * (post-swap() "next" as opposed is now a + * dangling pointer). + */ + next = vma; + } if (remove_next == 2) { + mas_reset(&mas); remove_next = 1; - next = next_next; + end = next->vm_end; goto again; } } - if (insert && file) + if (insert && file) { uprobe_mmap(insert); + } mas_destroy(&mas); validate_mm(mm); @@ -1613,8 +1635,8 @@ return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; } -/** - * unmapped_area() - Find an area between the low_limit and the high_limit with +/* + * unmapped_area() Find an area between the low_limit and the high_limit with * the correct alignment and offset, all from @info. Note: current->mm is used * for the search. * @@ -1640,15 +1662,12 @@ gap = mas.index; gap += (info->align_offset - gap) & info->align_mask; - VM_BUG_ON(gap + info->length > info->high_limit); - VM_BUG_ON(gap + info->length > mas.last); return gap; } -/** - * unmapped_area_topdown() - Find an area between the low_limit and the +/* unmapped_area_topdown() Find an area between the low_limit and the * high_limit with * the correct alignment and offset at the highest available - * address, all from @info. Note: current->mm is used for the search. + * address, all from * @info. Note: current->mm is used for the search. * * @info: The unmapped area information including the range (low_limit - * hight_limit), the alignment offset and mask. @@ -1671,8 +1690,6 @@ gap = mas.last + 1 - info->length; gap -= (gap - info->align_offset) & info->align_mask; - VM_BUG_ON(gap < info->low_limit); - VM_BUG_ON(gap < mas.index); return gap; } @@ -1884,12 +1901,12 @@ EXPORT_SYMBOL(find_vma_intersection); /** - * find_vma() - Find the VMA for a given address, or the next VMA. + * find_vma() - Find the VMA for a given address, or the next vma. * @mm: The mm_struct to check * @addr: The address * - * Returns: The VMA associated with addr, or the next VMA. - * May return %NULL in the case of no VMA at addr or above. + * Returns: The VMA associated with addr, or the next vma. + * May return %NULL in the case of no vma at addr or above. */ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) { @@ -2642,7 +2659,7 @@ (vma ? can_vma_merge_after(prev, vm_flags, vma->anon_vma, file, pgoff, vma->vm_userfaultfd_ctx, NULL) : can_vma_merge_after(prev, vm_flags, NULL, file, pgoff, - NULL_VM_UFFD_CTX, NULL))) { + NULL_VM_UFFD_CTX , NULL))) { merge_start = prev->vm_start; vma = prev; vm_pgoff = prev->vm_pgoff; @@ -3036,7 +3053,6 @@ unsigned long addr, unsigned long len, unsigned long flags) { struct mm_struct *mm = current->mm; - validate_mm_mt(mm); /* @@ -3092,7 +3108,7 @@ vma->vm_flags = flags; vma->vm_page_prot = vm_get_page_prot(flags); mas_set_range(mas, vma->vm_start, addr + len - 1); - if (mas_store_gfp(mas, vma, GFP_KERNEL)) + if ( mas_store_gfp(mas, vma, GFP_KERNEL)) goto mas_store_fail; mm->map_count++; @@ -3155,7 +3171,7 @@ vma = mas_prev(&mas, 0); if (!vma || vma->vm_end != addr || vma_policy(vma) || !can_vma_merge_after(vma, flags, NULL, NULL, - addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) + addr >> PAGE_SHIFT,NULL_VM_UFFD_CTX, NULL)) vma = NULL; ret = do_brk_flags(&mas, vma, addr, len, flags);