This makes it safe to deference task->files with just rcu_read_lock held, removing the need to take task_lock. Signed-off-by: Eric W. Biederman <ebiederm@xxxxxxxxxxxx> --- I have cleaned up my patch a little more and made this a little more explicitly rcu. I took a completley different approach to documenting the use of rcu_head than we described that seems a little more robust. fs/file.c | 53 ++++++++++++++++++++++++++--------------- include/linux/fdtable.h | 1 + kernel/fork.c | 4 ++-- 3 files changed, 37 insertions(+), 21 deletions(-) diff --git a/fs/file.c b/fs/file.c index 412033d8cfdf..88064f185560 100644 --- a/fs/file.c +++ b/fs/file.c @@ -379,7 +379,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int return NULL; } -static struct fdtable *close_files(struct files_struct * files) +static void close_files(struct files_struct * files) { /* * It is safe to dereference the fd table without RCU or @@ -397,8 +397,9 @@ static struct fdtable *close_files(struct files_struct * files) set = fdt->open_fds[j++]; while (set) { if (set & 1) { - struct file * file = xchg(&fdt->fd[i], NULL); + struct file * file = fdt->fd[i]; if (file) { + rcu_assign_pointer(fdt->fd[i], NULL); filp_close(file, files); cond_resched(); } @@ -407,19 +408,35 @@ static struct fdtable *close_files(struct files_struct * files) set >>= 1; } } +} - return fdt; +static void free_files_struct_rcu(struct rcu_head *rcu) +{ + struct files_struct *files = + container_of(rcu, struct files_struct, fdtab.rcu); + struct fdtable *fdt = rcu_dereference_raw(files->fdt); + + /* free the arrays if they are not embedded */ + if (fdt != &files->fdtab) + __free_fdtable(fdt); + kmem_cache_free(files_cachep, files); } void put_files_struct(struct files_struct *files) { if (atomic_dec_and_test(&files->count)) { - struct fdtable *fdt = close_files(files); - - /* free the arrays if they are not embedded */ - if (fdt != &files->fdtab) - __free_fdtable(fdt); - kmem_cache_free(files_cachep, files); + close_files(files); + /* + * The rules for using the rcu_head in fdtable: + * + * If the fdtable is being freed independently + * of the files_struct fdtable->rcu is used. + * + * When the fdtable and files_struct are freed + * together the rcu_head from the fdtable embedded in + * files_struct is used. + */ + call_rcu(&files->fdtab.rcu, free_files_struct_rcu); } } @@ -822,12 +839,14 @@ EXPORT_SYMBOL(fget_raw); struct file *fget_task(struct task_struct *task, unsigned int fd) { + struct files_struct *files; struct file *file = NULL; - task_lock(task); - if (task->files) - file = __fget_files(task->files, fd, 0, 1); - task_unlock(task); + rcu_read_lock(); + files = rcu_dereference(task->files); + if (files) + file = __fget_files(files, fd, 0, 1); + rcu_read_unlock(); return file; } @@ -838,11 +857,9 @@ struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd) struct files_struct *files; struct file *file = NULL; - task_lock(task); - files = task->files; + files = rcu_dereference(task->files); if (files) file = files_lookup_fd_rcu(files, fd); - task_unlock(task); return file; } @@ -854,8 +871,7 @@ struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret unsigned int fd = *ret_fd; struct file *file = NULL; - task_lock(task); - files = task->files; + files = rcu_dereference(task->files); if (files) { for (; fd < files_fdtable(files)->max_fds; fd++) { file = files_lookup_fd_rcu(files, fd); @@ -863,7 +879,6 @@ struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret break; } } - task_unlock(task); *ret_fd = fd; return file; } diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index d0e78174874a..37dcface020f 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -30,6 +30,7 @@ struct fdtable { unsigned long *close_on_exec; unsigned long *open_fds; unsigned long *full_fds_bits; + /* Used for freeing fdtable and any containing files_struct */ struct rcu_head rcu; }; diff --git a/kernel/fork.c b/kernel/fork.c index 4d0ae6f827df..eca474a1586a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2982,7 +2982,7 @@ int ksys_unshare(unsigned long unshare_flags) if (new_fd) { fd = current->files; - current->files = new_fd; + rcu_assign_pointer(current->files, new_fd); new_fd = fd; } @@ -3035,7 +3035,7 @@ int unshare_files(void) old = task->files; task_lock(task); - task->files = copy; + rcu_assign_pointer(task->files, copy); task_unlock(task); put_files_struct(old); return 0; -- 2.20.1