From: "Liam R. Howlett" <Liam.Howlett@xxxxxxxxxx> Relocation of code for the next commit. There should be no changes here. Link: https://lkml.kernel.org/r/20220504011345.662299-12-Liam.Howlett@xxxxxxxxxx Link: https://lkml.kernel.org/r/20220621204632.3370049-28-Liam.Howlett@xxxxxxxxxx Signed-off-by: Liam R. Howlett <Liam.Howlett@xxxxxxxxxx> Cc: Catalin Marinas <catalin.marinas@xxxxxxx> Cc: David Howells <dhowells@xxxxxxxxxx> Cc: "Matthew Wilcox (Oracle)" <willy@xxxxxxxxxxxxx> Cc: SeongJae Park <sj@xxxxxxxxxx> Cc: Vlastimil Babka <vbabka@xxxxxxx> Cc: Will Deacon <will@xxxxxxxxxx> Cc: Davidlohr Bueso <dave@xxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- mm/mmap.c | 492 +++++++++++++++++++++++++++--------------------------- 1 file changed, 246 insertions(+), 246 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index c70905ebeec7..582597391899 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1771,252 +1771,6 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags) return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; } -unsigned long mmap_region(struct file *file, unsigned long addr, - unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, - struct list_head *uf) -{ - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma = NULL; - struct vm_area_struct *next, *prev, *merge; - pgoff_t pglen = len >> PAGE_SHIFT; - unsigned long charged = 0; - unsigned long end = addr + len; - unsigned long merge_start = addr, merge_end = end; - pgoff_t vm_pgoff; - int error; - MA_STATE(mas, &mm->mm_mt, addr, end - 1); - - /* Check against address space limit. */ - if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) { - unsigned long nr_pages; - - /* - * MAP_FIXED may remove pages of mappings that intersects with - * requested mapping. Account for the pages it would unmap. - */ - nr_pages = count_vma_pages_range(mm, addr, end); - - if (!may_expand_vm(mm, vm_flags, - (len >> PAGE_SHIFT) - nr_pages)) - return -ENOMEM; - } - - /* Unmap any existing mapping in the area */ - if (do_munmap(mm, addr, len, uf)) - return -ENOMEM; - - /* - * Private writable mapping: check memory availability - */ - if (accountable_mapping(file, vm_flags)) { - charged = len >> PAGE_SHIFT; - if (security_vm_enough_memory_mm(mm, charged)) - return -ENOMEM; - vm_flags |= VM_ACCOUNT; - } - - next = mas_next(&mas, ULONG_MAX); - prev = mas_prev(&mas, 0); - if (vm_flags & VM_SPECIAL) - goto cannot_expand; - - /* Attempt to expand an old mapping */ - /* Check next */ - if (next && next->vm_start == end && !vma_policy(next) && - can_vma_merge_before(next, vm_flags, NULL, file, pgoff+pglen, - NULL_VM_UFFD_CTX, NULL)) { - merge_end = next->vm_end; - vma = next; - vm_pgoff = next->vm_pgoff - pglen; - } - - /* Check prev */ - if (prev && prev->vm_end == addr && !vma_policy(prev) && - (vma ? can_vma_merge_after(prev, vm_flags, vma->anon_vma, file, - pgoff, vma->vm_userfaultfd_ctx, NULL) : - can_vma_merge_after(prev, vm_flags, NULL, file, pgoff, - NULL_VM_UFFD_CTX, NULL))) { - merge_start = prev->vm_start; - vma = prev; - vm_pgoff = prev->vm_pgoff; - } - - - /* Actually expand, if possible */ - if (vma && - !vma_expand(&mas, vma, merge_start, merge_end, vm_pgoff, next)) { - khugepaged_enter_vma(vma, vm_flags); - goto expanded; - } - - mas.index = addr; - mas.last = end - 1; -cannot_expand: - /* - * Determine the object being mapped and call the appropriate - * specific mapper. the address has already been validated, but - * not unmapped, but the maps are removed from the list. - */ - vma = vm_area_alloc(mm); - if (!vma) { - error = -ENOMEM; - goto unacct_error; - } - - vma->vm_start = addr; - vma->vm_end = end; - vma->vm_flags = vm_flags; - vma->vm_page_prot = vm_get_page_prot(vm_flags); - vma->vm_pgoff = pgoff; - - if (file) { - if (vm_flags & VM_SHARED) { - error = mapping_map_writable(file->f_mapping); - if (error) - goto free_vma; - } - - vma->vm_file = get_file(file); - error = call_mmap(file, vma); - if (error) - goto unmap_and_free_vma; - - /* Can addr have changed?? - * - * Answer: Yes, several device drivers can do it in their - * f_op->mmap method. -DaveM - */ - WARN_ON_ONCE(addr != vma->vm_start); - - addr = vma->vm_start; - mas_reset(&mas); - - /* - * If vm_flags changed after call_mmap(), we should try merge - * vma again as we may succeed this time. - */ - if (unlikely(vm_flags != vma->vm_flags && prev)) { - merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags, - NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX, NULL); - if (merge) { - /* - * ->mmap() can change vma->vm_file and fput - * the original file. So fput the vma->vm_file - * here or we would add an extra fput for file - * and cause general protection fault - * ultimately. - */ - fput(vma->vm_file); - vm_area_free(vma); - vma = merge; - /* Update vm_flags to pick up the change. */ - addr = vma->vm_start; - vm_flags = vma->vm_flags; - goto unmap_writable; - } - } - - vm_flags = vma->vm_flags; - } else if (vm_flags & VM_SHARED) { - error = shmem_zero_setup(vma); - if (error) - goto free_vma; - } else { - vma_set_anonymous(vma); - } - - /* Allow architectures to sanity-check the vm_flags */ - if (!arch_validate_flags(vma->vm_flags)) { - error = -EINVAL; - if (file) - goto unmap_and_free_vma; - else - goto free_vma; - } - - if (mas_preallocate(&mas, vma, GFP_KERNEL)) { - error = -ENOMEM; - if (file) - goto unmap_and_free_vma; - else - goto free_vma; - } - - if (vma->vm_file) - i_mmap_lock_write(vma->vm_file->f_mapping); - - vma_mas_store(vma, &mas); - __vma_link_list(mm, vma, prev); - mm->map_count++; - if (vma->vm_file) { - if (vma->vm_flags & VM_SHARED) - mapping_allow_writable(vma->vm_file->f_mapping); - - flush_dcache_mmap_lock(vma->vm_file->f_mapping); - vma_interval_tree_insert(vma, &vma->vm_file->f_mapping->i_mmap); - flush_dcache_mmap_unlock(vma->vm_file->f_mapping); - i_mmap_unlock_write(vma->vm_file->f_mapping); - } - - /* - * vma_merge() calls khugepaged_enter_vma() either, the below - * call covers the non-merge case. - */ - khugepaged_enter_vma(vma, vma->vm_flags); - - /* Once vma denies write, undo our temporary denial count */ -unmap_writable: - if (file && vm_flags & VM_SHARED) - mapping_unmap_writable(file->f_mapping); - file = vma->vm_file; -expanded: - perf_event_mmap(vma); - - vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT); - if (vm_flags & VM_LOCKED) { - if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) || - is_vm_hugetlb_page(vma) || - vma == get_gate_vma(current->mm)) - vma->vm_flags &= VM_LOCKED_CLEAR_MASK; - else - mm->locked_vm += (len >> PAGE_SHIFT); - } - - if (file) - uprobe_mmap(vma); - - /* - * New (or expanded) vma always get soft dirty status. - * Otherwise user-space soft-dirty page tracker won't - * be able to distinguish situation when vma area unmapped, - * then new mapped in-place (which must be aimed as - * a completely new data area). - */ - vma->vm_flags |= VM_SOFTDIRTY; - - vma_set_page_prot(vma); - - validate_mm(mm); - return addr; - -unmap_and_free_vma: - fput(vma->vm_file); - vma->vm_file = NULL; - - /* Undo any partial mapping done by a device driver. */ - unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end); - charged = 0; - if (vm_flags & VM_SHARED) - mapping_unmap_writable(file->f_mapping); -free_vma: - vm_area_free(vma); -unacct_error: - if (charged) - vm_unacct_memory(charged); - validate_mm(mm); - return error; -} - /** * unmapped_area() - Find an area between the low_limit and the high_limit with * the correct alignment and offset, all from @info. Note: current->mm is used @@ -2891,6 +2645,252 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, return __do_munmap(mm, start, len, uf, false); } +unsigned long mmap_region(struct file *file, unsigned long addr, + unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, + struct list_head *uf) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma = NULL; + struct vm_area_struct *next, *prev, *merge; + pgoff_t pglen = len >> PAGE_SHIFT; + unsigned long charged = 0; + unsigned long end = addr + len; + unsigned long merge_start = addr, merge_end = end; + pgoff_t vm_pgoff; + int error; + MA_STATE(mas, &mm->mm_mt, addr, end - 1); + + /* Check against address space limit. */ + if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) { + unsigned long nr_pages; + + /* + * MAP_FIXED may remove pages of mappings that intersects with + * requested mapping. Account for the pages it would unmap. + */ + nr_pages = count_vma_pages_range(mm, addr, end); + + if (!may_expand_vm(mm, vm_flags, + (len >> PAGE_SHIFT) - nr_pages)) + return -ENOMEM; + } + + /* Unmap any existing mapping in the area */ + if (do_munmap(mm, addr, len, uf)) + return -ENOMEM; + + /* + * Private writable mapping: check memory availability + */ + if (accountable_mapping(file, vm_flags)) { + charged = len >> PAGE_SHIFT; + if (security_vm_enough_memory_mm(mm, charged)) + return -ENOMEM; + vm_flags |= VM_ACCOUNT; + } + + next = mas_next(&mas, ULONG_MAX); + prev = mas_prev(&mas, 0); + if (vm_flags & VM_SPECIAL) + goto cannot_expand; + + /* Attempt to expand an old mapping */ + /* Check next */ + if (next && next->vm_start == end && !vma_policy(next) && + can_vma_merge_before(next, vm_flags, NULL, file, pgoff+pglen, + NULL_VM_UFFD_CTX, NULL)) { + merge_end = next->vm_end; + vma = next; + vm_pgoff = next->vm_pgoff - pglen; + } + + /* Check prev */ + if (prev && prev->vm_end == addr && !vma_policy(prev) && + (vma ? can_vma_merge_after(prev, vm_flags, vma->anon_vma, file, + pgoff, vma->vm_userfaultfd_ctx, NULL) : + can_vma_merge_after(prev, vm_flags, NULL, file, pgoff, + NULL_VM_UFFD_CTX, NULL))) { + merge_start = prev->vm_start; + vma = prev; + vm_pgoff = prev->vm_pgoff; + } + + + /* Actually expand, if possible */ + if (vma && + !vma_expand(&mas, vma, merge_start, merge_end, vm_pgoff, next)) { + khugepaged_enter_vma(vma, vm_flags); + goto expanded; + } + + mas.index = addr; + mas.last = end - 1; +cannot_expand: + /* + * Determine the object being mapped and call the appropriate + * specific mapper. the address has already been validated, but + * not unmapped, but the maps are removed from the list. + */ + vma = vm_area_alloc(mm); + if (!vma) { + error = -ENOMEM; + goto unacct_error; + } + + vma->vm_start = addr; + vma->vm_end = end; + vma->vm_flags = vm_flags; + vma->vm_page_prot = vm_get_page_prot(vm_flags); + vma->vm_pgoff = pgoff; + + if (file) { + if (vm_flags & VM_SHARED) { + error = mapping_map_writable(file->f_mapping); + if (error) + goto free_vma; + } + + vma->vm_file = get_file(file); + error = call_mmap(file, vma); + if (error) + goto unmap_and_free_vma; + + /* Can addr have changed?? + * + * Answer: Yes, several device drivers can do it in their + * f_op->mmap method. -DaveM + */ + WARN_ON_ONCE(addr != vma->vm_start); + + addr = vma->vm_start; + mas_reset(&mas); + + /* + * If vm_flags changed after call_mmap(), we should try merge + * vma again as we may succeed this time. + */ + if (unlikely(vm_flags != vma->vm_flags && prev)) { + merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags, + NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX, NULL); + if (merge) { + /* + * ->mmap() can change vma->vm_file and fput + * the original file. So fput the vma->vm_file + * here or we would add an extra fput for file + * and cause general protection fault + * ultimately. + */ + fput(vma->vm_file); + vm_area_free(vma); + vma = merge; + /* Update vm_flags to pick up the change. */ + addr = vma->vm_start; + vm_flags = vma->vm_flags; + goto unmap_writable; + } + } + + vm_flags = vma->vm_flags; + } else if (vm_flags & VM_SHARED) { + error = shmem_zero_setup(vma); + if (error) + goto free_vma; + } else { + vma_set_anonymous(vma); + } + + /* Allow architectures to sanity-check the vm_flags */ + if (!arch_validate_flags(vma->vm_flags)) { + error = -EINVAL; + if (file) + goto unmap_and_free_vma; + else + goto free_vma; + } + + if (mas_preallocate(&mas, vma, GFP_KERNEL)) { + error = -ENOMEM; + if (file) + goto unmap_and_free_vma; + else + goto free_vma; + } + + if (vma->vm_file) + i_mmap_lock_write(vma->vm_file->f_mapping); + + vma_mas_store(vma, &mas); + __vma_link_list(mm, vma, prev); + mm->map_count++; + if (vma->vm_file) { + if (vma->vm_flags & VM_SHARED) + mapping_allow_writable(vma->vm_file->f_mapping); + + flush_dcache_mmap_lock(vma->vm_file->f_mapping); + vma_interval_tree_insert(vma, &vma->vm_file->f_mapping->i_mmap); + flush_dcache_mmap_unlock(vma->vm_file->f_mapping); + i_mmap_unlock_write(vma->vm_file->f_mapping); + } + + /* + * vma_merge() calls khugepaged_enter_vma() either, the below + * call covers the non-merge case. + */ + khugepaged_enter_vma(vma, vma->vm_flags); + + /* Once vma denies write, undo our temporary denial count */ +unmap_writable: + if (file && vm_flags & VM_SHARED) + mapping_unmap_writable(file->f_mapping); + file = vma->vm_file; +expanded: + perf_event_mmap(vma); + + vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT); + if (vm_flags & VM_LOCKED) { + if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) || + is_vm_hugetlb_page(vma) || + vma == get_gate_vma(current->mm)) + vma->vm_flags &= VM_LOCKED_CLEAR_MASK; + else + mm->locked_vm += (len >> PAGE_SHIFT); + } + + if (file) + uprobe_mmap(vma); + + /* + * New (or expanded) vma always get soft dirty status. + * Otherwise user-space soft-dirty page tracker won't + * be able to distinguish situation when vma area unmapped, + * then new mapped in-place (which must be aimed as + * a completely new data area). + */ + vma->vm_flags |= VM_SOFTDIRTY; + + vma_set_page_prot(vma); + + validate_mm(mm); + return addr; + +unmap_and_free_vma: + fput(vma->vm_file); + vma->vm_file = NULL; + + /* Undo any partial mapping done by a device driver. */ + unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end); + charged = 0; + if (vm_flags & VM_SHARED) + mapping_unmap_writable(file->f_mapping); +free_vma: + vm_area_free(vma); +unacct_error: + if (charged) + vm_unacct_memory(charged); + validate_mm(mm); + return error; +} + static int __vm_munmap(unsigned long start, size_t len, bool downgrade) { int ret; -- 2.35.1