Defer freeing of vma->vm_file when freeing vmas. This is to allow speculative page faults in the mapped file case. Signed-off-by: Michel Lespinasse <michel@xxxxxxxxxxxxxx> --- fs/exec.c | 1 + kernel/fork.c | 17 +++++++++++++++-- mm/mmap.c | 11 +++++++---- mm/nommu.c | 6 ++---- 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 18594f11c31f..c9da73eb0f53 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -286,6 +286,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) mmap_write_unlock(mm); err_free: bprm->vma = NULL; + VM_BUG_ON(vma->vm_file); vm_area_free(vma); return err; } diff --git a/kernel/fork.c b/kernel/fork.c index b6078e546114..2f20a5c5fed8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -369,19 +369,31 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) return new; } +static inline void ____vm_area_free(struct vm_area_struct *vma) +{ + if (vma->vm_file) + fput(vma->vm_file); + kmem_cache_free(vm_area_cachep, vma); +} + #ifdef CONFIG_SPECULATIVE_PAGE_FAULT static void __vm_area_free(struct rcu_head *head) { struct vm_area_struct *vma = container_of(head, struct vm_area_struct, vm_rcu); - kmem_cache_free(vm_area_cachep, vma); + ____vm_area_free(vma); } +#endif + void vm_area_free(struct vm_area_struct *vma) { +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT call_rcu(&vma->vm_rcu, __vm_area_free); +#else + ____vm_area_free(vma); +#endif } -#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */ static void account_kernel_stack(struct task_struct *tsk, int account) { @@ -621,6 +633,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, fail_nomem_anon_vma_fork: mpol_put(vma_policy(tmp)); fail_nomem_policy: + tmp->vm_file = NULL; /* prevents fput within vm_area_free() */ vm_area_free(tmp); fail_nomem: retval = -ENOMEM; diff --git a/mm/mmap.c b/mm/mmap.c index 3f287599a7a3..cc2323e243bb 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -178,9 +178,8 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) might_sleep(); if (vma->vm_ops && vma->vm_ops->close) vma->vm_ops->close(vma); - if (vma->vm_file) - fput(vma->vm_file); mpol_put(vma_policy(vma)); + /* fput(vma->vm_file) happens in vm_area_free after an RCU delay. */ vm_area_free(vma); return next; } @@ -949,7 +948,8 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, if (remove_next) { if (file) { uprobe_munmap(next, next->vm_start, next->vm_end); - fput(file); + /* fput(file) happens whthin vm_area_free(next) */ + VM_BUG_ON(file != next->vm_file); } if (next->anon_vma) anon_vma_merge(vma, next); @@ -1828,7 +1828,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, * fput the vma->vm_file here or we would add an extra fput for file * and cause general protection fault ultimately. */ - fput(vma->vm_file); + /* fput happens within vm_area_free */ vm_area_free(vma); vma = merge; /* Update vm_flags to pick up the change. */ @@ -1907,6 +1907,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, if (vm_flags & VM_DENYWRITE) allow_write_access(file); free_vma: + VM_BUG_ON(vma->vm_file); vm_area_free(vma); unacct_error: if (charged) @@ -2779,6 +2780,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, out_free_mpol: mpol_put(vma_policy(new)); out_free_vma: + new->vm_file = NULL; /* prevents fput within vm_area_free() */ vm_area_free(new); return err; } @@ -3343,6 +3345,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, out_free_mempol: mpol_put(vma_policy(new_vma)); out_free_vma: + new_vma->vm_file = NULL; /* Prevent fput within vm_area_free */ vm_area_free(new_vma); out: return NULL; diff --git a/mm/nommu.c b/mm/nommu.c index 5c9ab799c0e6..06a0dc0b913b 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -664,9 +664,8 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) { if (vma->vm_ops && vma->vm_ops->close) vma->vm_ops->close(vma); - if (vma->vm_file) - fput(vma->vm_file); put_nommu_region(vma->vm_region); + /* fput(vma->vm_file) happens within vm_area_free() */ vm_area_free(vma); } @@ -1267,8 +1266,7 @@ unsigned long do_mmap(struct file *file, if (region->vm_file) fput(region->vm_file); kmem_cache_free(vm_region_jar, region); - if (vma->vm_file) - fput(vma->vm_file); + /* fput(vma->vm_file) happens within vm_area_free() */ vm_area_free(vma); return ret; -- 2.20.1