[patch 38/52] fs: icache RCU free inodes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



RCU free the struct inode. This will allow:

- sb_inode_list_lock to be moved inside i_lock because sb list walkers who want
  to take i_lock no longer need to take sb_inode_list_lock to walk the list in
  the first place. This will simplify and optimize locking.
- eventually, completely write-free RCU path walking. The inode must be
  consulted for permissions when walking, so a write-free reference (ie.
  RCU is helpful).
- can potentially simplify things a bit in VM land. May not need to take the
  page lock to get back to the page->mapping.
- can remove some nested trylock loops in dcache code
- Could remove the 'wq' allocation from socket now that the entire thing is
  rcu freed.

todo: convert all filesystems
---
 fs/block_dev.c        |    9 ++++++++-
 fs/ext2/super.c       |    9 ++++++++-
 fs/ext3/super.c       |    9 ++++++++-
 fs/fat/inode.c        |    9 ++++++++-
 fs/hugetlbfs/inode.c  |    9 ++++++++-
 fs/inode.c            |    9 ++++++++-
 fs/nfs/inode.c        |    9 ++++++++-
 fs/proc/inode.c       |    9 ++++++++-
 include/linux/fs.h    |    5 ++++-
 ipc/mqueue.c          |    9 ++++++++-
 mm/shmem.c            |    9 ++++++++-
 net/socket.c          |    9 ++++++++-
 net/sunrpc/rpc_pipe.c |   10 +++++++++-
 13 files changed, 101 insertions(+), 13 deletions(-)

Index: linux-2.6/fs/ext2/super.c
===================================================================
--- linux-2.6.orig/fs/ext2/super.c
+++ linux-2.6/fs/ext2/super.c
@@ -161,11 +161,18 @@ static struct inode *ext2_alloc_inode(st
 	return &ei->vfs_inode;
 }
 
-static void ext2_destroy_inode(struct inode *inode)
+static void ext2_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
 }
 
+static void ext2_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, ext2_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c
+++ linux-2.6/fs/inode.c
@@ -277,13 +277,20 @@ void __destroy_inode(struct inode *inode
 }
 EXPORT_SYMBOL(__destroy_inode);
 
+static void i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(inode_cachep, inode);
+}
+
 void destroy_inode(struct inode *inode)
 {
 	__destroy_inode(inode);
 	if (inode->i_sb->s_op->destroy_inode)
 		inode->i_sb->s_op->destroy_inode(inode);
 	else
-		kmem_cache_free(inode_cachep, (inode));
+		call_rcu(&inode->i_rcu, i_callback);
 }
 
 /*
@@ -346,6 +353,7 @@ void clear_inode(struct inode *inode)
 		bd_forget(inode);
 	if (S_ISCHR(inode->i_mode) && inode->i_cdev)
 		cd_forget(inode);
+	/* don't need i_lock here */
 	inode->i_state = I_CLEAR;
 }
 EXPORT_SYMBOL(clear_inode);
@@ -661,7 +669,7 @@ __inode_add_to_lists(struct super_block
 	spin_unlock(&sb_inode_list_lock);
 	if (b) {
 		spin_lock_bucket(b);
-		hlist_bl_add_head(&inode->i_hash, &b->head);
+		hlist_bl_add_head_rcu(&inode->i_hash, &b->head);
 		spin_unlock_bucket(b);
 	}
 }
@@ -713,6 +721,7 @@ struct inode *new_inode(struct super_blo
 
 	inode = alloc_inode(sb);
 	if (inode) {
+		/* XXX: init as locked for speedup */
 		spin_lock(&sb_inode_list_lock);
 		spin_lock(&inode->i_lock);
 		inode->i_ino = atomic_inc_return(&last_ino);
@@ -870,6 +879,7 @@ static int test_inode_iunique(struct sup
 			spin_unlock_bucket(b);
 			return 0;
 		}
+		/* XXX: test for I_FREEING|I_CLEAR|etc? */
 	}
 	spin_unlock_bucket(b);
 	return 1;
@@ -1156,42 +1166,41 @@ int insert_inode_locked(struct inode *in
 	struct super_block *sb = inode->i_sb;
 	ino_t ino = inode->i_ino;
 	struct inode_hash_bucket *b = inode_hashtable + hash(sb, ino);
+	struct hlist_bl_node *node;
+	struct inode *old;
 
 	inode->i_state |= I_NEW;
-	while (1) {
-		struct hlist_bl_node *node;
-		struct inode *old = NULL;
 
 repeat:
-		spin_lock_bucket(b);
-		hlist_bl_for_each_entry(old, node, &b->head, i_hash) {
-			if (old->i_ino != ino)
-				continue;
-			if (old->i_sb != sb)
-				continue;
-			if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
-				continue;
-			if (!spin_trylock(&old->i_lock)) {
-				spin_unlock_bucket(b);
-				goto repeat;
-			}
-			break;
-		}
-		if (likely(!node)) {
-			hlist_bl_add_head(&inode->i_hash, &b->head);
+	spin_lock_bucket(b);
+	hlist_bl_for_each_entry(old, node, &b->head, i_hash) {
+		if (old->i_ino != ino)
+			continue;
+		if (old->i_sb != sb)
+			continue;
+		if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+			continue;
+		if (!spin_trylock(&old->i_lock)) {
 			spin_unlock_bucket(b);
-			return 0;
-		}
-		spin_unlock_bucket(b);
-		__iget(old);
-		spin_unlock(&old->i_lock);
-		wait_on_inode(old);
-		if (unlikely(!hlist_bl_unhashed(&old->i_hash))) {
-			iput(old);
-			return -EBUSY;
+			goto repeat;
 		}
+		goto found_old;
+	}
+	hlist_bl_add_head_rcu(&inode->i_hash, &b->head);
+	spin_unlock_bucket(b);
+	return 0;
+
+found_old:
+	spin_unlock_bucket(b);
+	__iget(old);
+	spin_unlock(&old->i_lock);
+	wait_on_inode(old);
+	if (unlikely(!hlist_bl_unhashed(&old->i_hash))) {
 		iput(old);
+		return -EBUSY;
 	}
+	iput(old);
+	goto repeat;
 }
 EXPORT_SYMBOL(insert_inode_locked);
 
@@ -1200,43 +1209,44 @@ int insert_inode_locked4(struct inode *i
 {
 	struct super_block *sb = inode->i_sb;
 	struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval);
+	struct hlist_bl_node *node;
+	struct inode *old;
 
 	inode->i_state |= I_NEW;
 
-	while (1) {
-		struct hlist_bl_node *node;
-		struct inode *old = NULL;
-
 repeat:
-		spin_lock_bucket(b);
-		hlist_bl_for_each_entry(old, node, &b->head, i_hash) {
-			if (old->i_sb != sb)
-				continue;
-			if (!test(old, data))
-				continue;
-			if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
-				continue;
-			if (!spin_trylock(&old->i_lock)) {
-				spin_unlock_bucket(b);
-				goto repeat;
-			}
-			break;
-		}
-		if (likely(!node)) {
-			hlist_bl_add_head(&inode->i_hash, &b->head);
+	spin_lock_bucket(b);
+	hlist_bl_for_each_entry(old, node, &b->head, i_hash) {
+		if (old->i_sb != sb)
+			continue;
+		/* XXX: audit put test outside i_lock? */
+		if (!test(old, data))
+			continue;
+		if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+			continue;
+		if (!spin_trylock(&old->i_lock)) {
 			spin_unlock_bucket(b);
-			return 0;
-		}
-		spin_unlock_bucket(b);
-		__iget(old);
-		spin_unlock(&old->i_lock);
-		wait_on_inode(old);
-		if (unlikely(!hlist_bl_unhashed(&old->i_hash))) {
-			iput(old);
-			return -EBUSY;
+			cpu_relax();
+			cpu_relax();
+			goto repeat;
 		}
+		goto found_old;
+	}
+	hlist_bl_add_head_rcu(&inode->i_hash, &b->head);
+	spin_unlock_bucket(b);
+	return 0;
+
+found_old:
+	spin_unlock_bucket(b);
+	__iget(old);
+	spin_unlock(&old->i_lock);
+	wait_on_inode(old);
+	if (unlikely(!hlist_bl_unhashed(&old->i_hash))) {
 		iput(old);
+		return -EBUSY;
 	}
+	iput(old);
+	goto repeat;
 }
 EXPORT_SYMBOL(insert_inode_locked4);
 
@@ -1254,7 +1264,7 @@ void __insert_inode_hash(struct inode *i
 
 	spin_lock(&inode->i_lock);
 	spin_lock_bucket(b);
-	hlist_bl_add_head(&inode->i_hash, &b->head);
+	hlist_bl_add_head_rcu(&inode->i_hash, &b->head);
 	spin_unlock_bucket(b);
 	spin_unlock(&inode->i_lock);
 }
@@ -1271,7 +1281,7 @@ void __remove_inode_hash(struct inode *i
 {
 	struct inode_hash_bucket *b = inode_hashtable + hash(inode->i_sb, inode->i_ino);
 	spin_lock_bucket(b);
-	hlist_bl_del_init(&inode->i_hash);
+	hlist_bl_del_init_rcu(&inode->i_hash);
 	spin_unlock_bucket(b);
 }
 
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -726,7 +726,10 @@ struct inode {
 	struct hlist_bl_node	i_hash;
 	struct list_head	i_list;		/* backing dev IO list */
 	struct list_head	i_sb_list;
-	struct list_head	i_dentry;
+	union {
+		struct list_head	i_dentry;
+		struct rcu_head		i_rcu;
+	};
 	unsigned long		i_ino;
 	unsigned int		i_count;
 	unsigned int		i_nlink;
Index: linux-2.6/fs/block_dev.c
===================================================================
--- linux-2.6.orig/fs/block_dev.c
+++ linux-2.6/fs/block_dev.c
@@ -397,13 +397,20 @@ static struct inode *bdev_alloc_inode(st
 	return &ei->vfs_inode;
 }
 
-static void bdev_destroy_inode(struct inode *inode)
+static void bdev_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
 	struct bdev_inode *bdi = BDEV_I(inode);
 
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(bdev_cachep, bdi);
 }
 
+static void bdev_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, bdev_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct bdev_inode *ei = (struct bdev_inode *) foo;
Index: linux-2.6/fs/ext3/super.c
===================================================================
--- linux-2.6.orig/fs/ext3/super.c
+++ linux-2.6/fs/ext3/super.c
@@ -485,6 +485,13 @@ static struct inode *ext3_alloc_inode(st
 	return &ei->vfs_inode;
 }
 
+static void ext3_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+}
+
 static void ext3_destroy_inode(struct inode *inode)
 {
 	if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
@@ -495,7 +502,7 @@ static void ext3_destroy_inode(struct in
 				false);
 		dump_stack();
 	}
-	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+	call_rcu(&inode->i_rcu, ext3_i_callback);
 }
 
 static void init_once(void *foo)
Index: linux-2.6/fs/hugetlbfs/inode.c
===================================================================
--- linux-2.6.orig/fs/hugetlbfs/inode.c
+++ linux-2.6/fs/hugetlbfs/inode.c
@@ -665,11 +665,18 @@ static struct inode *hugetlbfs_alloc_ino
 	return &p->vfs_inode;
 }
 
+static void hugetlbfs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
+}
+
 static void hugetlbfs_destroy_inode(struct inode *inode)
 {
 	hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
 	mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
-	kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
+	call_rcu(&inode->i_rcu, hugetlbfs_i_callback);
 }
 
 static const struct address_space_operations hugetlbfs_aops = {
Index: linux-2.6/fs/proc/inode.c
===================================================================
--- linux-2.6.orig/fs/proc/inode.c
+++ linux-2.6/fs/proc/inode.c
@@ -66,11 +66,18 @@ static struct inode *proc_alloc_inode(st
 	return inode;
 }
 
-static void proc_destroy_inode(struct inode *inode)
+static void proc_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(proc_inode_cachep, PROC_I(inode));
 }
 
+static void proc_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, proc_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct proc_inode *ei = (struct proc_inode *) foo;
Index: linux-2.6/ipc/mqueue.c
===================================================================
--- linux-2.6.orig/ipc/mqueue.c
+++ linux-2.6/ipc/mqueue.c
@@ -236,11 +236,18 @@ static struct inode *mqueue_alloc_inode(
 	return &ei->vfs_inode;
 }
 
-static void mqueue_destroy_inode(struct inode *inode)
+static void mqueue_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode));
 }
 
+static void mqueue_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, mqueue_i_callback);
+}
+
 static void mqueue_delete_inode(struct inode *inode)
 {
 	struct mqueue_inode_info *info;
Index: linux-2.6/net/socket.c
===================================================================
--- linux-2.6.orig/net/socket.c
+++ linux-2.6/net/socket.c
@@ -271,20 +271,20 @@ static struct inode *sock_alloc_inode(st
 }
 
 
-static void wq_free_rcu(struct rcu_head *head)
+static void sock_free_rcu(struct rcu_head *head)
 {
-	struct socket_wq *wq = container_of(head, struct socket_wq, rcu);
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	struct socket_alloc *ei = container_of(inode, struct socket_alloc,
+								vfs_inode);
 
-	kfree(wq);
+	kfree(ei->socket.wq);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(sock_inode_cachep, ei);
 }
 
 static void sock_destroy_inode(struct inode *inode)
 {
-	struct socket_alloc *ei;
-
-	ei = container_of(inode, struct socket_alloc, vfs_inode);
-	call_rcu(&ei->socket.wq->rcu, wq_free_rcu);
-	kmem_cache_free(sock_inode_cachep, ei);
+	call_rcu(&inode->i_rcu, sock_free_rcu);
 }
 
 static void init_once(void *foo)
Index: linux-2.6/fs/fat/inode.c
===================================================================
--- linux-2.6.orig/fs/fat/inode.c
+++ linux-2.6/fs/fat/inode.c
@@ -520,11 +520,18 @@ static struct inode *fat_alloc_inode(str
 	return &ei->vfs_inode;
 }
 
-static void fat_destroy_inode(struct inode *inode)
+static void fat_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
 }
 
+static void fat_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, fat_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
Index: linux-2.6/fs/nfs/inode.c
===================================================================
--- linux-2.6.orig/fs/nfs/inode.c
+++ linux-2.6/fs/nfs/inode.c
@@ -1365,11 +1365,18 @@ struct inode *nfs_alloc_inode(struct sup
 	return &nfsi->vfs_inode;
 }
 
-void nfs_destroy_inode(struct inode *inode)
+static void nfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
 }
 
+void nfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, nfs_i_callback);
+}
+
 static inline void nfs4_init_once(struct nfs_inode *nfsi)
 {
 #ifdef CONFIG_NFS_V4
Index: linux-2.6/mm/shmem.c
===================================================================
--- linux-2.6.orig/mm/shmem.c
+++ linux-2.6/mm/shmem.c
@@ -2389,13 +2389,20 @@ static struct inode *shmem_alloc_inode(s
 	return &p->vfs_inode;
 }
 
+static void shmem_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
+}
+
 static void shmem_destroy_inode(struct inode *inode)
 {
 	if ((inode->i_mode & S_IFMT) == S_IFREG) {
 		/* only struct inode is valid if it's an inline symlink */
 		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
 	}
-	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
+	call_rcu(&inode->i_rcu, shmem_i_callback);
 }
 
 static void init_once(void *foo)
Index: linux-2.6/net/sunrpc/rpc_pipe.c
===================================================================
--- linux-2.6.orig/net/sunrpc/rpc_pipe.c
+++ linux-2.6/net/sunrpc/rpc_pipe.c
@@ -163,11 +163,19 @@ rpc_alloc_inode(struct super_block *sb)
 }
 
 static void
-rpc_destroy_inode(struct inode *inode)
+rpc_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(rpc_inode_cachep, RPC_I(inode));
 }
 
+static void
+rpc_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, rpc_i_callback);
+}
+
 static int
 rpc_pipe_open(struct inode *inode, struct file *filp)
 {
Index: linux-2.6/include/linux/net.h
===================================================================
--- linux-2.6.orig/include/linux/net.h
+++ linux-2.6/include/linux/net.h
@@ -120,7 +120,6 @@ enum sock_shutdown_cmd {
 struct socket_wq {
 	wait_queue_head_t	wait;
 	struct fasync_struct	*fasync_list;
-	struct rcu_head		rcu;
 } ____cacheline_aligned_in_smp;
 
 /**


--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux