[PATCH 03/23] vfs: Generalize the file_list

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Eric W. Biederman <ebiederm@xxxxxxxxxxxx>

In the implementation of revoke it is desirable to find all of the
files we want to operation on.  Currently tty's and mark_files_ro
use the file_list for this, and this patch generalizes the file
list so it can be used more efficiently.

This patch starts by introducing struct file_list making the file
list a first class object.  file_list_lock and file_list_unlock
are modified to take this object, making it clear which file_list
we intended to lock.

file_move is transformed into file_list_add taking a file_list and not
allowing the movement of one file to another. __dentry_open
is modified to support this by only adding normal files in open,
special files have always been ignored when walking the file_list.
__dentry_open skipping special files allows __ptmx_open and __tty_open
to safely call file_add as they are adding the file to the file_list
for the first time.

file_kill has been renamed file_list_del to make it clear what it is
doing and to keep from confusing it with a more revoke like operation.

put_filp has been modified to not take file_list_del as we are never
on a file_list when put_filp is called.

fs_may_remount_ro and mark_files_ro have been modified to walk the
inode list to find all of the inodes and then to walk the file list
on those inodes.  It can be a slightly longer walk as we frequently
cache inodes that we do not have open but the overall complexity
should be about the same, these are slow path functions, and it
gives us much greater flexibility overall.

Signed-off-by: Eric W. Biederman <ebiederm@xxxxxxxxxxxxxxxxxx>
---
 drivers/char/pty.c       |    2 +-
 drivers/char/tty_io.c    |   22 ++++-----
 fs/file_table.c          |  117 +++++++++++++++++++++++++---------------------
 fs/inode.c               |    1 +
 fs/open.c                |    6 ++-
 fs/select.c              |    2 -
 fs/super.c               |    1 -
 include/linux/fs.h       |   24 +++++++--
 include/linux/tty.h      |    2 +-
 security/selinux/hooks.c |    8 ++--
 10 files changed, 102 insertions(+), 83 deletions(-)

diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 31038a0..585f700 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -662,7 +662,7 @@ static int __ptmx_open(struct inode *inode, struct file *filp)
 
 	set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
 	filp->private_data = tty;
-	file_move(filp, &tty->tty_files);
+	file_list_add(filp, &tty->tty_files);
 
 	retval = devpts_pty_new(inode, tty->link);
 	if (retval)
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 66b99a2..b5c0ca1 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -235,11 +235,11 @@ static int check_tty_count(struct tty_struct *tty, const char *routine)
 	struct list_head *p;
 	int count = 0;
 
-	file_list_lock();
-	list_for_each(p, &tty->tty_files) {
+	file_list_lock(&tty->tty_files);
+	list_for_each(p, &tty->tty_files.list) {
 		count++;
 	}
-	file_list_unlock();
+	file_list_unlock(&tty->tty_files);
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 	    tty->driver->subtype == PTY_TYPE_SLAVE &&
 	    tty->link && tty->link->count)
@@ -554,9 +554,9 @@ static void do_tty_hangup(struct work_struct *work)
 	spin_unlock(&redirect_lock);
 
 	check_tty_count(tty, "do_tty_hangup");
-	file_list_lock();
+	file_list_lock(&tty->tty_files);
 	/* This breaks for file handles being sent over AF_UNIX sockets ? */
-	list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) {
+	list_for_each_entry(filp, &tty->tty_files.list, f_u.fu_list) {
 		if (filp->f_op->write == redirected_tty_write)
 			cons_filp = filp;
 		if (filp->f_op->write != tty_write)
@@ -565,7 +565,7 @@ static void do_tty_hangup(struct work_struct *work)
 		tty_fasync(-1, filp, 0);	/* can't block */
 		filp->f_op = &hung_up_tty_fops;
 	}
-	file_list_unlock();
+	file_list_unlock(&tty->tty_files);
 	/*
 	 * FIXME! What are the locking issues here? This may me overdoing
 	 * things... This question is especially important now that we've
@@ -1467,10 +1467,6 @@ static void release_one_tty(struct kref *kref)
 	tty_driver_kref_put(driver);
 	module_put(driver->owner);
 
-	file_list_lock();
-	list_del_init(&tty->tty_files);
-	file_list_unlock();
-
 	free_tty_struct(tty);
 }
 
@@ -1678,7 +1674,7 @@ void tty_release_dev(struct file *filp)
 	 *  - do_tty_hangup no longer sees this file descriptor as
 	 *    something that needs to be handled for hangups.
 	 */
-	file_kill(filp);
+	file_list_del(filp, &tty->tty_files);
 	filp->private_data = NULL;
 
 	/*
@@ -1836,7 +1832,7 @@ got_driver:
 		return PTR_ERR(tty);
 
 	filp->private_data = tty;
-	file_move(filp, &tty->tty_files);
+	file_list_add(filp, &tty->tty_files);
 	check_tty_count(tty, "tty_open");
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 	    tty->driver->subtype == PTY_TYPE_MASTER)
@@ -2779,7 +2775,7 @@ void initialize_tty_struct(struct tty_struct *tty,
 	mutex_init(&tty->echo_lock);
 	spin_lock_init(&tty->read_lock);
 	spin_lock_init(&tty->ctrl_lock);
-	INIT_LIST_HEAD(&tty->tty_files);
+	init_file_list(&tty->tty_files);
 	INIT_WORK(&tty->SAK_work, do_SAK_work);
 
 	tty->driver = driver;
diff --git a/fs/file_table.c b/fs/file_table.c
index 334ce39..978f267 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -22,6 +22,7 @@
 #include <linux/fsnotify.h>
 #include <linux/sysctl.h>
 #include <linux/percpu_counter.h>
+#include <linux/writeback.h>
 
 #include <asm/atomic.h>
 
@@ -30,9 +31,6 @@ struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
 };
 
-/* public. Not pretty! */
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
-
 /* SLAB cache for file structures */
 static struct kmem_cache *filp_cachep __read_mostly;
 
@@ -285,7 +283,8 @@ void __fput(struct file *file)
 		cdev_put(inode->i_cdev);
 	fops_put(file->f_op);
 	put_pid(file->f_owner.pid);
-	file_kill(file);
+	if (!special_file(inode->i_mode))
+		file_list_del(file, &inode->i_files);
 	if (file->f_mode & FMODE_WRITE)
 		drop_file_write_access(file);
 	file->f_path.dentry = NULL;
@@ -352,50 +351,57 @@ void put_filp(struct file *file)
 {
 	if (atomic_long_dec_and_test(&file->f_count)) {
 		security_file_free(file);
-		file_kill(file);
 		file_free(file);
 	}
 }
 
-void file_move(struct file *file, struct list_head *list)
+void init_file_list(struct file_list *files)
 {
-	if (!list)
-		return;
-	file_list_lock();
-	list_move(&file->f_u.fu_list, list);
-	file_list_unlock();
+	INIT_LIST_HEAD(&files->list);
+	spin_lock_init(&files->lock);
 }
 
-void file_kill(struct file *file)
+void file_list_add(struct file *file, struct file_list *files)
 {
-	if (!list_empty(&file->f_u.fu_list)) {
-		file_list_lock();
-		list_del_init(&file->f_u.fu_list);
-		file_list_unlock();
-	}
+	file_list_lock(files);
+	list_add(&file->f_u.fu_list, &files->list);
+	file_list_unlock(files);
+}
+EXPORT_SYMBOL(file_list_add);
+
+void file_list_del(struct file *file, struct file_list *files)
+{
+	file_list_lock(files);
+	list_del_init(&file->f_u.fu_list);
+	file_list_unlock(files);
 }
+EXPORT_SYMBOL(file_list_del);
 
 int fs_may_remount_ro(struct super_block *sb)
 {
+	struct inode *inode;
 	struct file *file;
 
 	/* Check that no files are currently opened for writing. */
-	file_list_lock();
-	list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
-		struct inode *inode = file->f_path.dentry->d_inode;
-
-		/* File with pending delete? */
-		if (inode->i_nlink == 0)
-			goto too_bad;
-
-		/* Writeable file? */
-		if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
-			goto too_bad;
+	spin_lock(&inode_lock);
+	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+		file_list_lock(&inode->i_files);
+		list_for_each_entry(file, &inode->i_files.list, f_u.fu_list) {
+			/* File with pending delete? */
+			if (inode->i_nlink == 0)
+				goto too_bad;
+
+			/* Writeable file? */
+			if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
+				goto too_bad;
+		}
+		file_list_unlock(&inode->i_files);
 	}
-	file_list_unlock();
+	spin_unlock(&inode_lock);
 	return 1; /* Tis' cool bro. */
 too_bad:
-	file_list_unlock();
+	file_list_unlock(&inode->i_files);
+	spin_unlock(&inode_lock);
 	return 0;
 }
 
@@ -408,33 +414,38 @@ too_bad:
  */
 void mark_files_ro(struct super_block *sb)
 {
+	struct inode *inode;
 	struct file *f;
 
 retry:
-	file_list_lock();
-	list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
-		struct vfsmount *mnt;
-		if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
-		       continue;
-		if (!file_count(f))
-			continue;
-		if (!(f->f_mode & FMODE_WRITE))
-			continue;
-		f->f_mode &= ~FMODE_WRITE;
-		if (file_check_writeable(f) != 0)
-			continue;
-		file_release_write(f);
-		mnt = mntget(f->f_path.mnt);
-		file_list_unlock();
-		/*
-		 * This can sleep, so we can't hold
-		 * the file_list_lock() spinlock.
-		 */
-		mnt_drop_write(mnt);
-		mntput(mnt);
-		goto retry;
+	spin_lock(&inode_lock);
+	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+		file_list_lock(&inode->i_files);
+		list_for_each_entry(f, &inode->i_files.list, f_u.fu_list) {
+			struct vfsmount *mnt;
+			if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
+				continue;
+			if (!file_count(f))
+				continue;
+			if (!(f->f_mode & FMODE_WRITE))
+				continue;
+			f->f_mode &= ~FMODE_WRITE;
+			if (file_check_writeable(f) != 0)
+				continue;
+			file_release_write(f);
+			mnt = mntget(f->f_path.mnt);
+			file_list_unlock(&inode->i_files);
+			/*
+			 * This can sleep, so we can't hold
+			 * the file_list_lock() spinlock.
+			 */
+			mnt_drop_write(mnt);
+			mntput(mnt);
+			goto retry;
+		}
+		file_list_unlock(&inode->i_files);
 	}
-	file_list_unlock();
+	spin_unlock(&inode_lock);
 }
 
 void __init files_init(unsigned long mempages)
diff --git a/fs/inode.c b/fs/inode.c
index 9d26490..9d52d43 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -251,6 +251,7 @@ void inode_init_once(struct inode *inode)
 	INIT_LIST_HEAD(&inode->inotify_watches);
 	mutex_init(&inode->inotify_mutex);
 #endif
+	init_file_list(&inode->i_files);
 }
 EXPORT_SYMBOL(inode_init_once);
 
diff --git a/fs/open.c b/fs/open.c
index 7200e23..20c3fc0 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -828,7 +828,8 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 	f->f_path.mnt = mnt;
 	f->f_pos = 0;
 	f->f_op = fops_get(inode->i_fop);
-	file_move(f, &inode->i_sb->s_files);
+	if (!special_file(inode->i_mode))
+		file_list_add(f, &inode->i_files);
 
 	error = security_dentry_open(f, cred);
 	if (error)
@@ -873,7 +874,8 @@ cleanup_all:
 			mnt_drop_write(mnt);
 		}
 	}
-	file_kill(f);
+	if (!special_file(inode->i_mode))
+		file_list_del(f, &inode->i_files);
 	f->f_path.dentry = NULL;
 	f->f_path.mnt = NULL;
 cleanup_file:
diff --git a/fs/select.c b/fs/select.c
index bd30fe8..99e4145 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -942,7 +942,6 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
 }
 #endif /* HAVE_SET_RESTORE_SIGMASK */
 
-#ifdef CONFIG_FILE_HOTPLUG
 static int unpoll_file_once(wait_queue_head_t *q, struct file *file)
 {
 	unsigned long flags;
@@ -971,4 +970,3 @@ void unpoll_file(wait_queue_head_t *q, struct file *file)
 		schedule_timeout_uninterruptible(1);
 }
 EXPORT_SYMBOL(unpoll_file);
-#endif
diff --git a/fs/super.c b/fs/super.c
index 2ea1586..477aeb4 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -65,7 +65,6 @@ static struct super_block *alloc_super(struct file_system_type *type)
 		INIT_LIST_HEAD(&s->s_dirty);
 		INIT_LIST_HEAD(&s->s_io);
 		INIT_LIST_HEAD(&s->s_more_io);
-		INIT_LIST_HEAD(&s->s_files);
 		INIT_LIST_HEAD(&s->s_instances);
 		INIT_HLIST_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 73242c3..5329fd6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -699,6 +699,11 @@ static inline int mapping_writably_mapped(struct address_space *mapping)
 	return mapping->i_mmap_writable != 0;
 }
 
+struct file_list {
+	spinlock_t		lock;
+	struct list_head	list;
+};
+
 /*
  * Use sequence counter to get consistent i_size on 32-bit processors.
  */
@@ -764,6 +769,7 @@ struct inode {
 	struct list_head	inotify_watches; /* watches on this inode */
 	struct mutex		inotify_mutex;	/* protects the watches list */
 #endif
+	struct file_list	i_files;
 
 	unsigned long		i_state;
 	unsigned long		dirtied_when;	/* jiffies of first dirtying */
@@ -934,9 +940,15 @@ struct file {
 	unsigned long f_mnt_write_state;
 #endif
 };
-extern spinlock_t files_lock;
-#define file_list_lock() spin_lock(&files_lock);
-#define file_list_unlock() spin_unlock(&files_lock);
+
+static inline void file_list_lock(struct file_list *files)
+{
+	spin_lock(&files->lock);
+}
+static inline void file_list_unlock(struct file_list *files)
+{
+	spin_unlock(&files->lock);
+}
 
 #define get_file(x)	atomic_long_inc(&(x)->f_count)
 #define file_count(x)	atomic_long_read(&(x)->f_count)
@@ -1333,7 +1345,6 @@ struct super_block {
 	struct list_head	s_io;		/* parked for writeback */
 	struct list_head	s_more_io;	/* parked for more writeback */
 	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */
-	struct list_head	s_files;
 	/* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
 	struct list_head	s_dentry_lru;	/* unused dentry lru */
 	int			s_nr_dentry_unused;	/* # of dentry on lru */
@@ -2163,8 +2174,9 @@ static inline void insert_inode_hash(struct inode *inode) {
 }
 
 extern struct file * get_empty_filp(void);
-extern void file_move(struct file *f, struct list_head *list);
-extern void file_kill(struct file *f);
+extern void init_file_list(struct file_list *files);
+extern void file_list_add(struct file *f, struct file_list *files);
+extern void file_list_del(struct file *f, struct file_list *files);
 #ifdef CONFIG_BLOCK
 struct bio;
 extern void submit_bio(int, struct bio *);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index fc39db9..7f04a5e 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -250,7 +250,7 @@ struct tty_struct {
 	struct work_struct hangup_work;
 	void *disc_data;
 	void *driver_data;
-	struct list_head tty_files;
+	struct file_list tty_files;
 
 #define N_TTY_BUF_SIZE 4096
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 2fcad7c..65afe36 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2244,8 +2244,8 @@ static inline void flush_unauthorized_files(const struct cred *cred,
 
 	tty = get_current_tty();
 	if (tty) {
-		file_list_lock();
-		if (!list_empty(&tty->tty_files)) {
+		file_list_lock(&tty->tty_files);
+		if (!list_empty(&tty->tty_files.list)) {
 			struct inode *inode;
 
 			/* Revalidate access to controlling tty.
@@ -2253,14 +2253,14 @@ static inline void flush_unauthorized_files(const struct cred *cred,
 			   than using file_has_perm, as this particular open
 			   file may belong to another process and we are only
 			   interested in the inode-based check here. */
-			file = list_first_entry(&tty->tty_files, struct file, f_u.fu_list);
+			file = list_first_entry(&tty->tty_files.list, struct file, f_u.fu_list);
 			inode = file->f_path.dentry->d_inode;
 			if (inode_has_perm(cred, inode,
 					   FILE__READ | FILE__WRITE, NULL)) {
 				drop_tty = 1;
 			}
 		}
-		file_list_unlock();
+		file_list_unlock(&tty->tty_files);
 		tty_kref_put(tty);
 	}
 	/* Reset controlling tty. */
-- 
1.6.3.1.54.g99dd.dirty

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux