[PATCH] files: rcu free files_struct

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This makes it safe to deference task->files with just rcu_read_lock
held, removing the need to take task_lock.

Signed-off-by: Eric W. Biederman <ebiederm@xxxxxxxxxxxx>
---

I have cleaned up my patch a little more and made this a little
more explicitly rcu.  I took a completley different approach to
documenting the use of rcu_head than we described that seems a little
more robust.

 fs/file.c               | 53 ++++++++++++++++++++++++++---------------
 include/linux/fdtable.h |  1 +
 kernel/fork.c           |  4 ++--
 3 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index 412033d8cfdf..88064f185560 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -379,7 +379,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
 	return NULL;
 }
 
-static struct fdtable *close_files(struct files_struct * files)
+static void close_files(struct files_struct * files)
 {
 	/*
 	 * It is safe to dereference the fd table without RCU or
@@ -397,8 +397,9 @@ static struct fdtable *close_files(struct files_struct * files)
 		set = fdt->open_fds[j++];
 		while (set) {
 			if (set & 1) {
-				struct file * file = xchg(&fdt->fd[i], NULL);
+				struct file * file = fdt->fd[i];
 				if (file) {
+					rcu_assign_pointer(fdt->fd[i], NULL);
 					filp_close(file, files);
 					cond_resched();
 				}
@@ -407,19 +408,35 @@ static struct fdtable *close_files(struct files_struct * files)
 			set >>= 1;
 		}
 	}
+}
 
-	return fdt;
+static void free_files_struct_rcu(struct rcu_head *rcu)
+{
+	struct files_struct *files =
+		container_of(rcu, struct files_struct, fdtab.rcu);
+	struct fdtable *fdt = rcu_dereference_raw(files->fdt);
+
+	/* free the arrays if they are not embedded */
+	if (fdt != &files->fdtab)
+		__free_fdtable(fdt);
+	kmem_cache_free(files_cachep, files);
 }
 
 void put_files_struct(struct files_struct *files)
 {
 	if (atomic_dec_and_test(&files->count)) {
-		struct fdtable *fdt = close_files(files);
-
-		/* free the arrays if they are not embedded */
-		if (fdt != &files->fdtab)
-			__free_fdtable(fdt);
-		kmem_cache_free(files_cachep, files);
+		close_files(files);
+		/*
+		 * The rules for using the rcu_head in fdtable:
+		 *
+		 * If the fdtable is being freed independently
+		 * of the files_struct fdtable->rcu is used.
+		 *
+		 * When the fdtable and files_struct are freed
+		 * together the rcu_head from the fdtable embedded in
+		 * files_struct is used.
+		 */
+		call_rcu(&files->fdtab.rcu, free_files_struct_rcu);
 	}
 }
 
@@ -822,12 +839,14 @@ EXPORT_SYMBOL(fget_raw);
 
 struct file *fget_task(struct task_struct *task, unsigned int fd)
 {
+	struct files_struct *files;
 	struct file *file = NULL;
 
-	task_lock(task);
-	if (task->files)
-		file = __fget_files(task->files, fd, 0, 1);
-	task_unlock(task);
+	rcu_read_lock();
+	files = rcu_dereference(task->files);
+	if (files)
+		file = __fget_files(files, fd, 0, 1);
+	rcu_read_unlock();
 
 	return file;
 }
@@ -838,11 +857,9 @@ struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd)
 	struct files_struct *files;
 	struct file *file = NULL;
 
-	task_lock(task);
-	files = task->files;
+	files = rcu_dereference(task->files);
 	if (files)
 		file = files_lookup_fd_rcu(files, fd);
-	task_unlock(task);
 
 	return file;
 }
@@ -854,8 +871,7 @@ struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret
 	unsigned int fd = *ret_fd;
 	struct file *file = NULL;
 
-	task_lock(task);
-	files = task->files;
+	files = rcu_dereference(task->files);
 	if (files) {
 		for (; fd < files_fdtable(files)->max_fds; fd++) {
 			file = files_lookup_fd_rcu(files, fd);
@@ -863,7 +879,6 @@ struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret
 				break;
 		}
 	}
-	task_unlock(task);
 	*ret_fd = fd;
 	return file;
 }
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index d0e78174874a..37dcface020f 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -30,6 +30,7 @@ struct fdtable {
 	unsigned long *close_on_exec;
 	unsigned long *open_fds;
 	unsigned long *full_fds_bits;
+	/* Used for freeing fdtable and any containing files_struct */
 	struct rcu_head rcu;
 };
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 4d0ae6f827df..eca474a1586a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2982,7 +2982,7 @@ int ksys_unshare(unsigned long unshare_flags)
 
 		if (new_fd) {
 			fd = current->files;
-			current->files = new_fd;
+			rcu_assign_pointer(current->files, new_fd);
 			new_fd = fd;
 		}
 
@@ -3035,7 +3035,7 @@ int unshare_files(void)
 
 	old = task->files;
 	task_lock(task);
-	task->files = copy;
+	rcu_assign_pointer(task->files, copy);
 	task_unlock(task);
 	put_files_struct(old);
 	return 0;
-- 
2.20.1




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux