Let's factor the main logic out into replace_mm_exe_file(), such that all mm->exe_file logic is contained in kernel/fork.c. While at it, perform some simple cleanups that are possible now that we're simplifying the individual functions. Acked-by: Christian Brauner <christian.brauner@xxxxxxxxxx> Acked-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx> Signed-off-by: David Hildenbrand <david@xxxxxxxxxx> --- include/linux/mm.h | 1 + kernel/fork.c | 44 +++++++++++++++++++++++++++++++++++++++++--- kernel/sys.c | 33 +-------------------------------- 3 files changed, 43 insertions(+), 35 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 7ca22e6e694a..48c6fa9ab792 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2581,6 +2581,7 @@ extern int mm_take_all_locks(struct mm_struct *mm); extern void mm_drop_all_locks(struct mm_struct *mm); extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); +extern int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); extern struct file *get_mm_exe_file(struct mm_struct *mm); extern struct file *get_task_exe_file(struct task_struct *task); diff --git a/kernel/fork.c b/kernel/fork.c index bc94b2cc5995..eedce5c77041 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1148,9 +1148,7 @@ void mmput_async(struct mm_struct *mm) * * Main users are mmput() and sys_execve(). Callers prevent concurrent * invocations: in mmput() nobody alive left, in execve task is single - * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the - * mm->exe_file, but does so without using set_mm_exe_file() in order - * to avoid the need for any locks. + * threaded. */ void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) { @@ -1170,6 +1168,46 @@ void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) fput(old_exe_file); } +/** + * replace_mm_exe_file - replace a reference to the mm's executable file + * + * This changes mm's executable file (shown as symlink /proc/[pid]/exe), + * dealing with concurrent invocation and without grabbing the mmap lock in + * write mode. + * + * Main user is sys_prctl(PR_SET_MM_MAP/EXE_FILE). + */ +int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) +{ + struct vm_area_struct *vma; + struct file *old_exe_file; + int ret = 0; + + /* Forbid mm->exe_file change if old file still mapped. */ + old_exe_file = get_mm_exe_file(mm); + if (old_exe_file) { + mmap_read_lock(mm); + for (vma = mm->mmap; vma && !ret; vma = vma->vm_next) { + if (!vma->vm_file) + continue; + if (path_equal(&vma->vm_file->f_path, + &old_exe_file->f_path)) + ret = -EBUSY; + } + mmap_read_unlock(mm); + fput(old_exe_file); + if (ret) + return ret; + } + + /* set the new file, lockless */ + get_file(new_exe_file); + old_exe_file = xchg(&mm->exe_file, new_exe_file); + if (old_exe_file) + fput(old_exe_file); + return 0; +} + /** * get_mm_exe_file - acquire a reference to the mm's executable file * diff --git a/kernel/sys.c b/kernel/sys.c index ef1a78f5d71c..30c12e54585a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1846,7 +1846,6 @@ SYSCALL_DEFINE1(umask, int, mask) static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) { struct fd exe; - struct file *old_exe, *exe_file; struct inode *inode; int err; @@ -1869,40 +1868,10 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) if (err) goto exit; - /* - * Forbid mm->exe_file change if old file still mapped. - */ - exe_file = get_mm_exe_file(mm); - err = -EBUSY; - if (exe_file) { - struct vm_area_struct *vma; - - mmap_read_lock(mm); - for (vma = mm->mmap; vma; vma = vma->vm_next) { - if (!vma->vm_file) - continue; - if (path_equal(&vma->vm_file->f_path, - &exe_file->f_path)) - goto exit_err; - } - - mmap_read_unlock(mm); - fput(exe_file); - } - - err = 0; - /* set the new file, lockless */ - get_file(exe.file); - old_exe = xchg(&mm->exe_file, exe.file); - if (old_exe) - fput(old_exe); + err = replace_mm_exe_file(mm, exe.file); exit: fdput(exe); return err; -exit_err: - mmap_read_unlock(mm); - fput(exe_file); - goto exit; } /* -- 2.31.1