The quilt patch titled Subject: mm: move vma_shrink(), vma_expand() to internal header has been removed from the -mm tree. Its filename was mm-move-vma_shrink-vma_expand-to-internal-header.patch This patch was dropped because it was merged into the mm-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx> Subject: mm: move vma_shrink(), vma_expand() to internal header Date: Mon, 29 Jul 2024 12:50:37 +0100 The vma_shrink() and vma_expand() functions are internal VMA manipulation functions which we ought to abstract for use outside of memory management code. To achieve this, we replace shift_arg_pages() in fs/exec.c with an invocation of a new relocate_vma_down() function implemented in mm/mmap.c, which enables us to also move move_page_tables() and vma_iter_prev_range() to internal.h. The purpose of doing this is to isolate key VMA manipulation functions in order that we can both abstract them and later render them easily testable. Link: https://lkml.kernel.org/r/3cfcd9ec433e032a85f636fdc0d7d98fafbd19c5.1722251717.git.lorenzo.stoakes@xxxxxxxxxx Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx> Reviewed-by: Vlastimil Babka <vbabka@xxxxxxx> Reviewed-by: Liam R. Howlett <Liam.Howlett@xxxxxxxxxx> Cc: Alexander Viro <viro@xxxxxxxxxxxxxxxxxx> Cc: Brendan Higgins <brendanhiggins@xxxxxxxxxx> Cc: Christian Brauner <brauner@xxxxxxxxxx> Cc: David Gow <davidgow@xxxxxxxxxx> Cc: Eric W. Biederman <ebiederm@xxxxxxxxxxxx> Cc: Jan Kara <jack@xxxxxxx> Cc: Kees Cook <kees@xxxxxxxxxx> Cc: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> Cc: Rae Moar <rmoar@xxxxxxxxxx> Cc: SeongJae Park <sj@xxxxxxxxxx> Cc: Shuah Khan <shuah@xxxxxxxxxx> Cc: Suren Baghdasaryan <surenb@xxxxxxxxxx> Cc: Pengfei Xu <pengfei.xu@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- fs/exec.c | 81 +++---------------------------------------- include/linux/mm.h | 17 --------- mm/internal.h | 18 +++++++++ mm/mmap.c | 81 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 106 insertions(+), 91 deletions(-) --- a/fs/exec.c~mm-move-vma_shrink-vma_expand-to-internal-header +++ a/fs/exec.c @@ -712,80 +712,6 @@ static int copy_strings_kernel(int argc, #ifdef CONFIG_MMU /* - * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX. Once - * the binfmt code determines where the new stack should reside, we shift it to - * its final location. The process proceeds as follows: - * - * 1) Use shift to calculate the new vma endpoints. - * 2) Extend vma to cover both the old and new ranges. This ensures the - * arguments passed to subsequent functions are consistent. - * 3) Move vma's page tables to the new range. - * 4) Free up any cleared pgd range. - * 5) Shrink the vma to cover only the new range. - */ -static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) -{ - struct mm_struct *mm = vma->vm_mm; - unsigned long old_start = vma->vm_start; - unsigned long old_end = vma->vm_end; - unsigned long length = old_end - old_start; - unsigned long new_start = old_start - shift; - unsigned long new_end = old_end - shift; - VMA_ITERATOR(vmi, mm, new_start); - struct vm_area_struct *next; - struct mmu_gather tlb; - - BUG_ON(new_start > new_end); - - /* - * ensure there are no vmas between where we want to go - * and where we are - */ - if (vma != vma_next(&vmi)) - return -EFAULT; - - vma_iter_prev_range(&vmi); - /* - * cover the whole range: [new_start, old_end) - */ - if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL)) - return -ENOMEM; - - /* - * move the page tables downwards, on failure we rely on - * process cleanup to remove whatever mess we made. - */ - if (length != move_page_tables(vma, old_start, - vma, new_start, length, false, true)) - return -ENOMEM; - - lru_add_drain(); - tlb_gather_mmu(&tlb, mm); - next = vma_next(&vmi); - if (new_end > old_start) { - /* - * when the old and new regions overlap clear from new_end. - */ - free_pgd_range(&tlb, new_end, old_end, new_end, - next ? next->vm_start : USER_PGTABLES_CEILING); - } else { - /* - * otherwise, clean from old_start; this is done to not touch - * the address space in [new_end, old_start) some architectures - * have constraints on va-space that make this illegal (IA64) - - * for the others its just a little faster. - */ - free_pgd_range(&tlb, old_start, old_end, new_end, - next ? next->vm_start : USER_PGTABLES_CEILING); - } - tlb_finish_mmu(&tlb); - - vma_prev(&vmi); - /* Shrink the vma to just the new range */ - return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff); -} - -/* * Finalizes the stack vm_area_struct. The flags and permissions are updated, * the stack is optionally relocated, and some extra space is added. */ @@ -877,7 +803,12 @@ int setup_arg_pages(struct linux_binprm /* Move stack pages down in memory. */ if (stack_shift) { - ret = shift_arg_pages(vma, stack_shift); + /* + * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX. Once + * the binfmt code determines where the new stack should reside, we shift it to + * its final location. + */ + ret = relocate_vma_down(vma, stack_shift); if (ret) goto out_unlock; } --- a/include/linux/mm.h~mm-move-vma_shrink-vma_expand-to-internal-header +++ a/include/linux/mm.h @@ -1005,12 +1005,6 @@ static inline struct vm_area_struct *vma return mas_prev(&vmi->mas, 0); } -static inline -struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi) -{ - return mas_prev_range(&vmi->mas, 0); -} - static inline unsigned long vma_iter_addr(struct vma_iterator *vmi) { return vmi->mas.index; @@ -2520,11 +2514,6 @@ int set_page_dirty_lock(struct page *pag int get_cmdline(struct task_struct *task, char *buffer, int buflen); -extern unsigned long move_page_tables(struct vm_area_struct *vma, - unsigned long old_addr, struct vm_area_struct *new_vma, - unsigned long new_addr, unsigned long len, - bool need_rmap_locks, bool for_stack); - /* * Flags used by change_protection(). For now we make it a bitmap so * that we can pass in multiple flags just like parameters. However @@ -3267,11 +3256,6 @@ void anon_vma_interval_tree_verify(struc /* mmap.c */ extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); -extern int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, - unsigned long start, unsigned long end, pgoff_t pgoff, - struct vm_area_struct *next); -extern int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, - unsigned long start, unsigned long end, pgoff_t pgoff); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void unlink_file_vma(struct vm_area_struct *); @@ -3279,6 +3263,7 @@ extern struct vm_area_struct *copy_vma(s unsigned long addr, unsigned long len, pgoff_t pgoff, bool *need_rmap_locks); extern void exit_mmap(struct mm_struct *); +int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift); static inline int check_data_rlimit(unsigned long rlim, unsigned long new, --- a/mm/internal.h~mm-move-vma_shrink-vma_expand-to-internal-header +++ a/mm/internal.h @@ -1305,6 +1305,12 @@ static inline struct vm_area_struct vma_policy(vma), new_ctx, anon_vma_name(vma)); } +int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, + unsigned long start, unsigned long end, pgoff_t pgoff, + struct vm_area_struct *next); +int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, + unsigned long start, unsigned long end, pgoff_t pgoff); + enum { /* mark page accessed */ FOLL_TOUCH = 1 << 16, @@ -1528,6 +1534,12 @@ static inline int vma_iter_store_gfp(str return 0; } +static inline +struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi) +{ + return mas_prev_range(&vmi->mas, 0); +} + /* * VMA lock generalization */ @@ -1639,4 +1651,10 @@ void unlink_file_vma_batch_init(struct u void unlink_file_vma_batch_add(struct unlink_vma_file_batch *, struct vm_area_struct *); void unlink_file_vma_batch_final(struct unlink_vma_file_batch *); +/* mremap.c */ +unsigned long move_page_tables(struct vm_area_struct *vma, + unsigned long old_addr, struct vm_area_struct *new_vma, + unsigned long new_addr, unsigned long len, + bool need_rmap_locks, bool for_stack); + #endif /* __MM_INTERNAL_H */ --- a/mm/mmap.c~mm-move-vma_shrink-vma_expand-to-internal-header +++ a/mm/mmap.c @@ -4088,3 +4088,84 @@ static int __meminit init_reserve_notifi return 0; } subsys_initcall(init_reserve_notifier); + +/* + * Relocate a VMA downwards by shift bytes. There cannot be any VMAs between + * this VMA and its relocated range, which will now reside at [vma->vm_start - + * shift, vma->vm_end - shift). + * + * This function is almost certainly NOT what you want for anything other than + * early executable temporary stack relocation. + */ +int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) +{ + /* + * The process proceeds as follows: + * + * 1) Use shift to calculate the new vma endpoints. + * 2) Extend vma to cover both the old and new ranges. This ensures the + * arguments passed to subsequent functions are consistent. + * 3) Move vma's page tables to the new range. + * 4) Free up any cleared pgd range. + * 5) Shrink the vma to cover only the new range. + */ + + struct mm_struct *mm = vma->vm_mm; + unsigned long old_start = vma->vm_start; + unsigned long old_end = vma->vm_end; + unsigned long length = old_end - old_start; + unsigned long new_start = old_start - shift; + unsigned long new_end = old_end - shift; + VMA_ITERATOR(vmi, mm, new_start); + struct vm_area_struct *next; + struct mmu_gather tlb; + + BUG_ON(new_start > new_end); + + /* + * ensure there are no vmas between where we want to go + * and where we are + */ + if (vma != vma_next(&vmi)) + return -EFAULT; + + vma_iter_prev_range(&vmi); + /* + * cover the whole range: [new_start, old_end) + */ + if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL)) + return -ENOMEM; + + /* + * move the page tables downwards, on failure we rely on + * process cleanup to remove whatever mess we made. + */ + if (length != move_page_tables(vma, old_start, + vma, new_start, length, false, true)) + return -ENOMEM; + + lru_add_drain(); + tlb_gather_mmu(&tlb, mm); + next = vma_next(&vmi); + if (new_end > old_start) { + /* + * when the old and new regions overlap clear from new_end. + */ + free_pgd_range(&tlb, new_end, old_end, new_end, + next ? next->vm_start : USER_PGTABLES_CEILING); + } else { + /* + * otherwise, clean from old_start; this is done to not touch + * the address space in [new_end, old_start) some architectures + * have constraints on va-space that make this illegal (IA64) - + * for the others its just a little faster. + */ + free_pgd_range(&tlb, old_start, old_end, new_end, + next ? next->vm_start : USER_PGTABLES_CEILING); + } + tlb_finish_mmu(&tlb); + + vma_prev(&vmi); + /* Shrink the vma to just the new range */ + return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff); +} _ Patches currently in -mm which might be from lorenzo.stoakes@xxxxxxxxxx are tools-improve-vma-test-makefile.patch tools-add-vma-merge-tests.patch mm-introduce-vma_merge_struct-and-abstract-vma_mergevma_modify.patch mm-remove-duplicated-open-coded-vma-policy-check.patch mm-abstract-vma_expand-to-use-vma_merge_struct.patch mm-avoid-using-vma_merge-for-new-vmas.patch mm-make-vma_prepare-and-friends-static-and-internal-to-vmac.patch mm-introduce-commit_merge-abstracting-final-commit-of-merge.patch mm-refactor-vma_merge-into-modify-only-vma_merge_existing_range.patch mm-rework-vm_ops-close-handling-on-vma-merge.patch