Currently, we have a single refcount variable inside the files_struct. When we go to unshare the files_struct, we check this counter and if it's elevated, then we allocate a new files_struct instead of just repurposing the old one, under the assumption that that indicates that it's shared between tasks. This is not necessarily the case, however. Each task associated with the files_struct does get a long-held reference, but the refcount can be elevated for other reasons as well, by callers of get_files_struct. This means that we can end up allocating a new files_struct if we just happen to race with a call to get_files_struct. Fix this by adding a new counter to track the number associated threads, and use that to determine whether to allocate a new files_struct when unsharing. We protect this new counter with the file_lock instead of doing it with atomics, as a later patch will need to track an "in_exec" flag that will need to be handled in conjunction with the thread counter. Reported-by: Eric W. Biederman <ebiederm@xxxxxxxxxxxx> Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx> --- fs/coredump.c | 2 +- fs/exec.c | 2 +- fs/file.c | 18 ++++++++++++++++-- include/linux/fdtable.h | 2 ++ kernel/fork.c | 16 ++++++++++++---- 5 files changed, 32 insertions(+), 8 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 1e2c87acac9b..02374a4febc4 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -751,7 +751,7 @@ void do_coredump(const siginfo_t *siginfo) if (retval) goto close_fail; if (displaced) - put_files_struct(displaced); + release_files_struct(displaced); if (!dump_interrupted()) { file_start_write(cprm.file); core_dumped = binfmt->core_dump(&cprm); diff --git a/fs/exec.c b/fs/exec.c index 1ebf6e5a521d..e71a70d70158 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1832,7 +1832,7 @@ static int __do_execve_file(int fd, struct filename *filename, if (filename) putname(filename); if (displaced) - put_files_struct(displaced); + release_files_struct(displaced); return retval; out: diff --git a/fs/file.c b/fs/file.c index 7ffd6e9d103d..e35eeff4593c 100644 --- a/fs/file.c +++ b/fs/file.c @@ -284,6 +284,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) atomic_set(&newf->count, 1); spin_lock_init(&newf->file_lock); + newf->thread_count = 1; newf->resize_in_progress = false; init_waitqueue_head(&newf->resize_wait); newf->next_fd = 0; @@ -415,6 +416,8 @@ void put_files_struct(struct files_struct *files) if (atomic_dec_and_test(&files->count)) { struct fdtable *fdt = close_files(files); + WARN_ON_ONCE(files->thread_count); + /* free the arrays if they are not embedded */ if (fdt != &files->fdtab) __free_fdtable(fdt); @@ -422,16 +425,26 @@ void put_files_struct(struct files_struct *files) } } +void release_files_struct(struct files_struct *files) +{ + spin_lock(&files->file_lock); + --files->thread_count; + spin_unlock(&files->file_lock); + put_files_struct(files); +} + void reset_files_struct(struct files_struct *files) { struct task_struct *tsk = current; struct files_struct *old; old = tsk->files; + task_lock(tsk); tsk->files = files; task_unlock(tsk); - put_files_struct(old); + + release_files_struct(old); } void exit_files(struct task_struct *tsk) @@ -442,7 +455,7 @@ void exit_files(struct task_struct *tsk) task_lock(tsk); tsk->files = NULL; task_unlock(tsk); - put_files_struct(files); + release_files_struct(files); } } @@ -457,6 +470,7 @@ struct files_struct init_files = { .full_fds_bits = init_files.full_fds_bits_init, }, .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock), + .thread_count = 1, }; static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start) diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 41615f38bcff..2cb02f438201 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -59,6 +59,7 @@ struct files_struct { * written part on a separate cache line in SMP */ spinlock_t file_lock ____cacheline_aligned_in_smp; + int thread_count; unsigned int next_fd; unsigned long close_on_exec_init[1]; unsigned long open_fds_init[1]; @@ -107,6 +108,7 @@ struct task_struct; struct files_struct *get_files_struct(struct task_struct *); void put_files_struct(struct files_struct *fs); +void release_files_struct(struct files_struct *fs); void reset_files_struct(struct files_struct *); int unshare_files(struct files_struct **); struct files_struct *dup_fd(struct files_struct *, int *) __latent_entropy; diff --git a/kernel/fork.c b/kernel/fork.c index f0b58479534f..c7eb63e4d5c1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1373,6 +1373,9 @@ static int copy_files(unsigned long clone_flags, struct task_struct *tsk) if (clone_flags & CLONE_FILES) { atomic_inc(&oldf->count); + spin_lock(&oldf->file_lock); + oldf->thread_count++; + spin_unlock(&oldf->file_lock); goto out; } @@ -2416,10 +2419,15 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) { struct files_struct *fd = current->files; - int error = 0; + int error, count; + + if (!(unshare_flags & CLONE_FILES) || !fd) + return 0; - if ((unshare_flags & CLONE_FILES) && - (fd && atomic_read(&fd->count) > 1)) { + spin_lock(&fd->file_lock); + count = fd->thread_count; + spin_unlock(&fd->file_lock); + if (count > 1) { *new_fdp = dup_fd(fd, &error); if (!*new_fdp) return error; @@ -2542,7 +2550,7 @@ int ksys_unshare(unsigned long unshare_flags) put_cred(new_cred); bad_unshare_cleanup_fd: if (new_fd) - put_files_struct(new_fd); + release_files_struct(new_fd); bad_unshare_cleanup_fs: if (new_fs) -- 2.17.1