On Thu 14-09-17 13:19:38, Andrew Morton wrote: > From: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> > Subject: include/linux/sched/mm.h: uninline mmdrop_async(), etc > > mmdrop_async() is only used in fork.c. Move that and its support > functions into fork.c, uninline it all. Is this really an improvement? Why do we want to discourage more code paths to use mmdrop_async? It sounds like a useful api and it has been removed only because it lost its own user in oom code. Now that we have a user I would just keep it where it was before. > Cc: Ingo Molnar <mingo@xxxxxxxxxx> > Cc: Michal Hocko <mhocko@xxxxxxxx> > Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> > Cc: Oleg Nesterov <oleg@xxxxxxxxxx> > Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> > --- > > include/linux/sched/mm.h | 24 - > kernel/fork.c | 455 +++++++++++++++++++------------------ > 2 files changed, 241 insertions(+), 238 deletions(-) > > diff -puN include/linux/sched/mm.h~include-linux-sched-mmh-uninline-mmdrop_async-etc include/linux/sched/mm.h > --- a/include/linux/sched/mm.h~include-linux-sched-mmh-uninline-mmdrop_async-etc > +++ a/include/linux/sched/mm.h > @@ -10,7 +10,7 @@ > /* > * Routines for handling mm_structs > */ > -extern struct mm_struct * mm_alloc(void); > +extern struct mm_struct *mm_alloc(void); > > /** > * mmgrab() - Pin a &struct mm_struct. > @@ -34,27 +34,7 @@ static inline void mmgrab(struct mm_stru > atomic_inc(&mm->mm_count); > } > > -/* mmdrop drops the mm and the page tables */ > -extern void __mmdrop(struct mm_struct *); > -static inline void mmdrop(struct mm_struct *mm) > -{ > - if (unlikely(atomic_dec_and_test(&mm->mm_count))) > - __mmdrop(mm); > -} > - > -static inline void mmdrop_async_fn(struct work_struct *work) > -{ > - struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); > - __mmdrop(mm); > -} > - > -static inline void mmdrop_async(struct mm_struct *mm) > -{ > - if (unlikely(atomic_dec_and_test(&mm->mm_count))) { > - INIT_WORK(&mm->async_put_work, mmdrop_async_fn); > - schedule_work(&mm->async_put_work); > - } > -} > +extern void mmdrop(struct mm_struct *mm); > > /** > * mmget() - Pin the address space associated with a &struct mm_struct. > diff -puN kernel/fork.c~include-linux-sched-mmh-uninline-mmdrop_async-etc kernel/fork.c > --- a/kernel/fork.c~include-linux-sched-mmh-uninline-mmdrop_async-etc > +++ a/kernel/fork.c > @@ -77,6 +77,7 @@ > #include <linux/blkdev.h> > #include <linux/fs_struct.h> > #include <linux/magic.h> > +#include <linux/sched/mm.h> > #include <linux/perf_event.h> > #include <linux/posix-timers.h> > #include <linux/user-return-notifier.h> > @@ -386,6 +387,244 @@ void free_task(struct task_struct *tsk) > } > EXPORT_SYMBOL(free_task); > > +#ifdef CONFIG_MMU > +static __latent_entropy int dup_mmap(struct mm_struct *mm, > + struct mm_struct *oldmm) > +{ > + struct vm_area_struct *mpnt, *tmp, *prev, **pprev; > + struct rb_node **rb_link, *rb_parent; > + int retval; > + unsigned long charge; > + LIST_HEAD(uf); > + > + uprobe_start_dup_mmap(); > + if (down_write_killable(&oldmm->mmap_sem)) { > + retval = -EINTR; > + goto fail_uprobe_end; > + } > + flush_cache_dup_mm(oldmm); > + uprobe_dup_mmap(oldmm, mm); > + /* > + * Not linked in yet - no deadlock potential: > + */ > + down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); > + > + /* No ordering required: file already has been exposed. */ > + RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); > + > + mm->total_vm = oldmm->total_vm; > + mm->data_vm = oldmm->data_vm; > + mm->exec_vm = oldmm->exec_vm; > + mm->stack_vm = oldmm->stack_vm; > + > + rb_link = &mm->mm_rb.rb_node; > + rb_parent = NULL; > + pprev = &mm->mmap; > + retval = ksm_fork(mm, oldmm); > + if (retval) > + goto out; > + retval = khugepaged_fork(mm, oldmm); > + if (retval) > + goto out; > + > + prev = NULL; > + for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { > + struct file *file; > + > + if (mpnt->vm_flags & VM_DONTCOPY) { > + vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt)); > + continue; > + } > + charge = 0; > + if (mpnt->vm_flags & VM_ACCOUNT) { > + unsigned long len = vma_pages(mpnt); > + > + if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ > + goto fail_nomem; > + charge = len; > + } > + tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); > + if (!tmp) > + goto fail_nomem; > + *tmp = *mpnt; > + INIT_LIST_HEAD(&tmp->anon_vma_chain); > + retval = vma_dup_policy(mpnt, tmp); > + if (retval) > + goto fail_nomem_policy; > + tmp->vm_mm = mm; > + retval = dup_userfaultfd(tmp, &uf); > + if (retval) > + goto fail_nomem_anon_vma_fork; > + if (tmp->vm_flags & VM_WIPEONFORK) { > + /* VM_WIPEONFORK gets a clean slate in the child. */ > + tmp->anon_vma = NULL; > + if (anon_vma_prepare(tmp)) > + goto fail_nomem_anon_vma_fork; > + } else if (anon_vma_fork(tmp, mpnt)) > + goto fail_nomem_anon_vma_fork; > + tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT); > + tmp->vm_next = tmp->vm_prev = NULL; > + file = tmp->vm_file; > + if (file) { > + struct inode *inode = file_inode(file); > + struct address_space *mapping = file->f_mapping; > + > + get_file(file); > + if (tmp->vm_flags & VM_DENYWRITE) > + atomic_dec(&inode->i_writecount); > + i_mmap_lock_write(mapping); > + if (tmp->vm_flags & VM_SHARED) > + atomic_inc(&mapping->i_mmap_writable); > + flush_dcache_mmap_lock(mapping); > + /* insert tmp into the share list, just after mpnt */ > + vma_interval_tree_insert_after(tmp, mpnt, > + &mapping->i_mmap); > + flush_dcache_mmap_unlock(mapping); > + i_mmap_unlock_write(mapping); > + } > + > + /* > + * Clear hugetlb-related page reserves for children. This only > + * affects MAP_PRIVATE mappings. Faults generated by the child > + * are not guaranteed to succeed, even if read-only > + */ > + if (is_vm_hugetlb_page(tmp)) > + reset_vma_resv_huge_pages(tmp); > + > + /* > + * Link in the new vma and copy the page table entries. > + */ > + *pprev = tmp; > + pprev = &tmp->vm_next; > + tmp->vm_prev = prev; > + prev = tmp; > + > + __vma_link_rb(mm, tmp, rb_link, rb_parent); > + rb_link = &tmp->vm_rb.rb_right; > + rb_parent = &tmp->vm_rb; > + > + mm->map_count++; > + if (!(tmp->vm_flags & VM_WIPEONFORK)) > + retval = copy_page_range(mm, oldmm, mpnt); > + > + if (tmp->vm_ops && tmp->vm_ops->open) > + tmp->vm_ops->open(tmp); > + > + if (retval) > + goto out; > + } > + /* a new mm has just been created */ > + arch_dup_mmap(oldmm, mm); > + retval = 0; > +out: > + up_write(&mm->mmap_sem); > + flush_tlb_mm(oldmm); > + up_write(&oldmm->mmap_sem); > + dup_userfaultfd_complete(&uf); > +fail_uprobe_end: > + uprobe_end_dup_mmap(); > + return retval; > +fail_nomem_anon_vma_fork: > + mpol_put(vma_policy(tmp)); > +fail_nomem_policy: > + kmem_cache_free(vm_area_cachep, tmp); > +fail_nomem: > + retval = -ENOMEM; > + vm_unacct_memory(charge); > + goto out; > +} > + > +static inline int mm_alloc_pgd(struct mm_struct *mm) > +{ > + mm->pgd = pgd_alloc(mm); > + if (unlikely(!mm->pgd)) > + return -ENOMEM; > + return 0; > +} > + > +static inline void mm_free_pgd(struct mm_struct *mm) > +{ > + pgd_free(mm, mm->pgd); > +} > +#else > +static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) > +{ > + down_write(&oldmm->mmap_sem); > + RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); > + up_write(&oldmm->mmap_sem); > + return 0; > +} > +#define mm_alloc_pgd(mm) (0) > +#define mm_free_pgd(mm) > +#endif /* CONFIG_MMU */ > + > +static void check_mm(struct mm_struct *mm) > +{ > + int i; > + > + for (i = 0; i < NR_MM_COUNTERS; i++) { > + long x = atomic_long_read(&mm->rss_stat.count[i]); > + > + if (unlikely(x)) > + printk(KERN_ALERT "BUG: Bad rss-counter state " > + "mm:%p idx:%d val:%ld\n", mm, i, x); > + } > + > + if (atomic_long_read(&mm->nr_ptes)) > + pr_alert("BUG: non-zero nr_ptes on freeing mm: %ld\n", > + atomic_long_read(&mm->nr_ptes)); > + if (mm_nr_pmds(mm)) > + pr_alert("BUG: non-zero nr_pmds on freeing mm: %ld\n", > + mm_nr_pmds(mm)); > + > +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS > + VM_BUG_ON_MM(mm->pmd_huge_pte, mm); > +#endif > +} > + > +#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) > +#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) > + > +/* > + * Called when the last reference to the mm > + * is dropped: either by a lazy thread or by > + * mmput. Free the page directory and the mm. > + */ > +static void __mmdrop(struct mm_struct *mm) > +{ > + BUG_ON(mm == &init_mm); > + mm_free_pgd(mm); > + destroy_context(mm); > + hmm_mm_destroy(mm); > + mmu_notifier_mm_destroy(mm); > + check_mm(mm); > + put_user_ns(mm->user_ns); > + free_mm(mm); > +} > + > +void mmdrop(struct mm_struct *mm) > +{ > + if (unlikely(atomic_dec_and_test(&mm->mm_count))) > + __mmdrop(mm); > +} > +EXPORT_SYMBOL_GPL(mmdrop); > + > +static void mmdrop_async_fn(struct work_struct *work) > +{ > + struct mm_struct *mm; > + > + mm = container_of(work, struct mm_struct, async_put_work); > + __mmdrop(mm); > +} > + > +static void mmdrop_async(struct mm_struct *mm) > +{ > + if (unlikely(atomic_dec_and_test(&mm->mm_count))) { > + INIT_WORK(&mm->async_put_work, mmdrop_async_fn); > + schedule_work(&mm->async_put_work); > + } > +} > + > static inline void free_signal_struct(struct signal_struct *sig) > { > taskstats_tgid_free(sig); > @@ -590,182 +829,8 @@ free_tsk: > return NULL; > } > > -#ifdef CONFIG_MMU > -static __latent_entropy int dup_mmap(struct mm_struct *mm, > - struct mm_struct *oldmm) > -{ > - struct vm_area_struct *mpnt, *tmp, *prev, **pprev; > - struct rb_node **rb_link, *rb_parent; > - int retval; > - unsigned long charge; > - LIST_HEAD(uf); > - > - uprobe_start_dup_mmap(); > - if (down_write_killable(&oldmm->mmap_sem)) { > - retval = -EINTR; > - goto fail_uprobe_end; > - } > - flush_cache_dup_mm(oldmm); > - uprobe_dup_mmap(oldmm, mm); > - /* > - * Not linked in yet - no deadlock potential: > - */ > - down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); > - > - /* No ordering required: file already has been exposed. */ > - RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); > - > - mm->total_vm = oldmm->total_vm; > - mm->data_vm = oldmm->data_vm; > - mm->exec_vm = oldmm->exec_vm; > - mm->stack_vm = oldmm->stack_vm; > - > - rb_link = &mm->mm_rb.rb_node; > - rb_parent = NULL; > - pprev = &mm->mmap; > - retval = ksm_fork(mm, oldmm); > - if (retval) > - goto out; > - retval = khugepaged_fork(mm, oldmm); > - if (retval) > - goto out; > - > - prev = NULL; > - for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { > - struct file *file; > - > - if (mpnt->vm_flags & VM_DONTCOPY) { > - vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt)); > - continue; > - } > - charge = 0; > - if (mpnt->vm_flags & VM_ACCOUNT) { > - unsigned long len = vma_pages(mpnt); > - > - if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ > - goto fail_nomem; > - charge = len; > - } > - tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); > - if (!tmp) > - goto fail_nomem; > - *tmp = *mpnt; > - INIT_LIST_HEAD(&tmp->anon_vma_chain); > - retval = vma_dup_policy(mpnt, tmp); > - if (retval) > - goto fail_nomem_policy; > - tmp->vm_mm = mm; > - retval = dup_userfaultfd(tmp, &uf); > - if (retval) > - goto fail_nomem_anon_vma_fork; > - if (tmp->vm_flags & VM_WIPEONFORK) { > - /* VM_WIPEONFORK gets a clean slate in the child. */ > - tmp->anon_vma = NULL; > - if (anon_vma_prepare(tmp)) > - goto fail_nomem_anon_vma_fork; > - } else if (anon_vma_fork(tmp, mpnt)) > - goto fail_nomem_anon_vma_fork; > - tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT); > - tmp->vm_next = tmp->vm_prev = NULL; > - file = tmp->vm_file; > - if (file) { > - struct inode *inode = file_inode(file); > - struct address_space *mapping = file->f_mapping; > - > - get_file(file); > - if (tmp->vm_flags & VM_DENYWRITE) > - atomic_dec(&inode->i_writecount); > - i_mmap_lock_write(mapping); > - if (tmp->vm_flags & VM_SHARED) > - atomic_inc(&mapping->i_mmap_writable); > - flush_dcache_mmap_lock(mapping); > - /* insert tmp into the share list, just after mpnt */ > - vma_interval_tree_insert_after(tmp, mpnt, > - &mapping->i_mmap); > - flush_dcache_mmap_unlock(mapping); > - i_mmap_unlock_write(mapping); > - } > - > - /* > - * Clear hugetlb-related page reserves for children. This only > - * affects MAP_PRIVATE mappings. Faults generated by the child > - * are not guaranteed to succeed, even if read-only > - */ > - if (is_vm_hugetlb_page(tmp)) > - reset_vma_resv_huge_pages(tmp); > - > - /* > - * Link in the new vma and copy the page table entries. > - */ > - *pprev = tmp; > - pprev = &tmp->vm_next; > - tmp->vm_prev = prev; > - prev = tmp; > - > - __vma_link_rb(mm, tmp, rb_link, rb_parent); > - rb_link = &tmp->vm_rb.rb_right; > - rb_parent = &tmp->vm_rb; > - > - mm->map_count++; > - if (!(tmp->vm_flags & VM_WIPEONFORK)) > - retval = copy_page_range(mm, oldmm, mpnt); > - > - if (tmp->vm_ops && tmp->vm_ops->open) > - tmp->vm_ops->open(tmp); > - > - if (retval) > - goto out; > - } > - /* a new mm has just been created */ > - arch_dup_mmap(oldmm, mm); > - retval = 0; > -out: > - up_write(&mm->mmap_sem); > - flush_tlb_mm(oldmm); > - up_write(&oldmm->mmap_sem); > - dup_userfaultfd_complete(&uf); > -fail_uprobe_end: > - uprobe_end_dup_mmap(); > - return retval; > -fail_nomem_anon_vma_fork: > - mpol_put(vma_policy(tmp)); > -fail_nomem_policy: > - kmem_cache_free(vm_area_cachep, tmp); > -fail_nomem: > - retval = -ENOMEM; > - vm_unacct_memory(charge); > - goto out; > -} > - > -static inline int mm_alloc_pgd(struct mm_struct *mm) > -{ > - mm->pgd = pgd_alloc(mm); > - if (unlikely(!mm->pgd)) > - return -ENOMEM; > - return 0; > -} > - > -static inline void mm_free_pgd(struct mm_struct *mm) > -{ > - pgd_free(mm, mm->pgd); > -} > -#else > -static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) > -{ > - down_write(&oldmm->mmap_sem); > - RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); > - up_write(&oldmm->mmap_sem); > - return 0; > -} > -#define mm_alloc_pgd(mm) (0) > -#define mm_free_pgd(mm) > -#endif /* CONFIG_MMU */ > - > __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); > > -#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) > -#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) > - > static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; > > static int __init coredump_filter_setup(char *s) > @@ -856,30 +921,6 @@ fail_nopgd: > return NULL; > } > > -static void check_mm(struct mm_struct *mm) > -{ > - int i; > - > - for (i = 0; i < NR_MM_COUNTERS; i++) { > - long x = atomic_long_read(&mm->rss_stat.count[i]); > - > - if (unlikely(x)) > - printk(KERN_ALERT "BUG: Bad rss-counter state " > - "mm:%p idx:%d val:%ld\n", mm, i, x); > - } > - > - if (atomic_long_read(&mm->nr_ptes)) > - pr_alert("BUG: non-zero nr_ptes on freeing mm: %ld\n", > - atomic_long_read(&mm->nr_ptes)); > - if (mm_nr_pmds(mm)) > - pr_alert("BUG: non-zero nr_pmds on freeing mm: %ld\n", > - mm_nr_pmds(mm)); > - > -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS > - VM_BUG_ON_MM(mm->pmd_huge_pte, mm); > -#endif > -} > - > /* > * Allocate and initialize an mm_struct. > */ > @@ -895,24 +936,6 @@ struct mm_struct *mm_alloc(void) > return mm_init(mm, current, current_user_ns()); > } > > -/* > - * Called when the last reference to the mm > - * is dropped: either by a lazy thread or by > - * mmput. Free the page directory and the mm. > - */ > -void __mmdrop(struct mm_struct *mm) > -{ > - BUG_ON(mm == &init_mm); > - mm_free_pgd(mm); > - destroy_context(mm); > - hmm_mm_destroy(mm); > - mmu_notifier_mm_destroy(mm); > - check_mm(mm); > - put_user_ns(mm->user_ns); > - free_mm(mm); > -} > -EXPORT_SYMBOL_GPL(__mmdrop); > - > static inline void __mmput(struct mm_struct *mm) > { > VM_BUG_ON(atomic_read(&mm->mm_users)); > _ > > Patches currently in -mm which might be from akpm@xxxxxxxxxxxxxxxxxxxx are > > i-need-old-gcc.patch > drivers-media-cec-cec-adapc-fix-build-with-gcc-444.patch > mm-oom_reaper-skip-mm-structs-with-mmu-notifiers-checkpatch-fixes.patch > arm-arch-arm-include-asm-pageh-needs-personalityh.patch > ocfs2-old-mle-put-and-release-after-the-function-dlm_add_migration_mle-called-fix.patch > mm.patch > include-linux-sched-mmh-uninline-mmdrop_async-etc.patch > kernel-reboot-add-devm_register_reboot_notifier-fix.patch > linux-next-git-rejects.patch > kernel-forkc-export-kernel_thread-to-modules.patch > slab-leaks3-default-y.patch > > -- > To unsubscribe from this list: send the line "unsubscribe mm-commits" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Michal Hocko SUSE Labs -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>