[patch 10/14] fs: icache remove inode_lock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Remove the global inode_lock, it has been made redundant by the
previous lock breakup.

Signed-off-by: Nick Piggin <npiggin@xxxxxxxxx>

---
 Documentation/filesystems/Locking |    2 
 Documentation/filesystems/porting |   10 +++-
 Documentation/filesystems/vfs.txt |    2 
 fs/buffer.c                       |    2 
 fs/drop_caches.c                  |    4 -
 fs/fs-writeback.c                 |   46 ++++--------------
 fs/inode.c                        |   93 ++++++--------------------------------
 fs/notify/inode_mark.c            |   13 +----
 fs/ntfs/inode.c                   |    4 -
 fs/ocfs2/inode.c                  |    2 
 fs/quota/dquot.c                  |   16 ++----
 include/linux/fs.h                |    2 
 include/linux/writeback.h         |    1 
 mm/backing-dev.c                  |    4 -
 mm/filemap.c                      |    6 +-
 mm/rmap.c                         |    6 +-
 16 files changed, 59 insertions(+), 154 deletions(-)

Index: linux-2.6/fs/buffer.c
===================================================================
--- linux-2.6.orig/fs/buffer.c	2010-10-21 23:49:52.000000000 +1100
+++ linux-2.6/fs/buffer.c	2010-10-21 23:50:27.000000000 +1100
@@ -1145,7 +1145,7 @@ __getblk_slow(struct block_device *bdev,
  * inode list.
  *
  * mark_buffer_dirty() is atomic.  It takes bh->b_page->mapping->private_lock,
- * mapping->tree_lock and the global inode_lock.
+ * and mapping->tree_lock.
  */
 void mark_buffer_dirty(struct buffer_head *bh)
 {
Index: linux-2.6/fs/drop_caches.c
===================================================================
--- linux-2.6.orig/fs/drop_caches.c	2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/drop_caches.c	2010-10-21 23:50:41.000000000 +1100
@@ -16,7 +16,6 @@ static void drop_pagecache_sb(struct sup
 {
 	struct inode *inode, *toput_inode = NULL;
 
-	spin_lock(&inode_lock);
 lock_again:
 	spin_lock(&sb_inode_list_lock);
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
@@ -36,15 +35,12 @@ static void drop_pagecache_sb(struct sup
 		inode_get_ilock(inode);
 		spin_unlock(&inode->i_lock);
 		spin_unlock(&sb_inode_list_lock);
-		spin_unlock(&inode_lock);
 		invalidate_mapping_pages(inode->i_mapping, 0, -1);
 		iput(toput_inode);
 		toput_inode = inode;
-		spin_lock(&inode_lock);
 		spin_lock(&sb_inode_list_lock);
 	}
 	spin_unlock(&sb_inode_list_lock);
-	spin_unlock(&inode_lock);
 	iput(toput_inode);
 }
 
Index: linux-2.6/fs/fs-writeback.c
===================================================================
--- linux-2.6.orig/fs/fs-writeback.c	2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/fs-writeback.c	2010-10-21 23:50:41.000000000 +1100
@@ -194,7 +194,7 @@ static void requeue_io(struct inode *ino
 static void inode_sync_complete(struct inode *inode)
 {
 	/*
-	 * Prevent speculative execution through spin_unlock(&inode_lock);
+	 * Prevent speculative execution through spin_unlock(&inode->i_lock);
 	 */
 	smp_mb();
 	wake_up_bit(&inode->i_state, __I_SYNC);
@@ -294,18 +294,16 @@ static void inode_wait_for_writeback(str
 	while (inode->i_state & I_SYNC) {
 		spin_unlock(&wb_inode_list_lock);
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&inode_lock);
 		__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
-		spin_lock(&inode_lock);
 		spin_lock(&inode->i_lock);
 		spin_lock(&wb_inode_list_lock);
 	}
 }
 
 /*
- * Write out an inode's dirty pages.  Called under inode_lock.  Either the
- * caller has ref on the inode (either via inode_get or via syscall against an
- * fd) or the inode has I_WILL_FREE set (via generic_forget_inode)
+ * Write out an inode's dirty pages.  Called under wb_inode_list_lock.  Either
+ * the caller has ref on the inode (either via inode_get or via syscall against
+ * an fd) or the inode has I_WILL_FREE set (via generic_forget_inode)
  *
  * If `wait' is set, wait on the writeout.
  *
@@ -313,7 +311,8 @@ static void inode_wait_for_writeback(str
  * starvation of particular inodes when others are being redirtied, prevent
  * livelocks, etc.
  *
- * Called under inode_lock.
+ * Called under wb_inode_list_lock and i_lock. May drop the locks but returns
+ * with them locked.
  */
 static int
 writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
@@ -354,7 +353,6 @@ writeback_single_inode(struct inode *ino
 	inode->i_state &= ~I_DIRTY_PAGES;
 	spin_unlock(&wb_inode_list_lock);
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&inode_lock);
 
 	ret = do_writepages(mapping, wbc);
 
@@ -374,12 +372,10 @@ writeback_single_inode(struct inode *ino
 	 * due to delalloc, clear dirty metadata flags right before
 	 * write_inode()
 	 */
-	spin_lock(&inode_lock);
 	spin_lock(&inode->i_lock);
 	dirty = inode->i_state & I_DIRTY;
 	inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&inode_lock);
 	/* Don't write the inode if only I_DIRTY_PAGES was set */
 	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
 		int err = write_inode(inode, wbc);
@@ -387,7 +383,6 @@ writeback_single_inode(struct inode *ino
 			ret = err;
 	}
 
-	spin_lock(&inode_lock);
 	spin_lock(&inode->i_lock);
 	spin_lock(&wb_inode_list_lock);
 	inode->i_state &= ~I_SYNC;
@@ -538,10 +533,8 @@ static int writeback_sb_inodes(struct su
 		}
 		spin_unlock(&wb_inode_list_lock);
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&inode_lock);
 		iput(inode);
 		cond_resched();
-		spin_lock(&inode_lock);
 		spin_lock(&wb_inode_list_lock);
 		if (wbc->nr_to_write <= 0) {
 			wbc->more_io = 1;
@@ -561,7 +554,6 @@ void writeback_inodes_wb(struct bdi_writ
 
 	if (!wbc->wb_start)
 		wbc->wb_start = jiffies; /* livelock avoidance */
-	spin_lock(&inode_lock);
 lock_again:
 	spin_lock(&wb_inode_list_lock);
 
@@ -590,7 +582,6 @@ void writeback_inodes_wb(struct bdi_writ
 			break;
 	}
 	spin_unlock(&wb_inode_list_lock);
-	spin_unlock(&inode_lock);
 	/* Leave any unwritten inodes on b_io */
 }
 
@@ -599,13 +590,11 @@ static void __writeback_inodes_sb(struct
 {
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
-	spin_lock(&inode_lock);
 	spin_lock(&wb_inode_list_lock);
 	if (!wbc->for_kupdate || list_empty(&wb->b_io))
 		queue_io(wb, wbc->older_than_this);
 	writeback_sb_inodes(sb, wb, wbc, true);
 	spin_unlock(&wb_inode_list_lock);
-	spin_unlock(&inode_lock);
 }
 
 /*
@@ -715,7 +704,6 @@ static long wb_writeback(struct bdi_writ
 		 * become available for writeback. Otherwise
 		 * we'll just busyloop.
 		 */
-		spin_lock(&inode_lock);
 lock_again:
 		spin_lock(&wb_inode_list_lock);
 		if (!list_empty(&wb->b_more_io))  {
@@ -731,7 +719,6 @@ static long wb_writeback(struct bdi_writ
 			spin_unlock(&inode->i_lock);
 		}
 		spin_unlock(&wb_inode_list_lock);
-		spin_unlock(&inode_lock);
 	}
 
 	return wrote;
@@ -994,7 +981,6 @@ void __mark_inode_dirty(struct inode *in
 	if (unlikely(block_dump))
 		block_dump___mark_inode_dirty(inode);
 
-	spin_lock(&inode_lock);
 	spin_lock(&inode->i_lock);
 	if ((inode->i_state & flags) != flags) {
 		const int was_dirty = inode->i_state & I_DIRTY;
@@ -1049,7 +1035,6 @@ void __mark_inode_dirty(struct inode *in
 	}
 out:
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&inode_lock);
 
 	if (wakeup_bdi)
 		bdi_wakeup_thread_delayed(bdi);
@@ -1083,7 +1068,6 @@ static void wait_sb_inodes(struct super_
 	 */
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
-	spin_lock(&inode_lock);
 lock_again:
 	spin_lock(&sb_inode_list_lock);
 
@@ -1114,14 +1098,12 @@ static void wait_sb_inodes(struct super_
 		inode_get_ilock_wblock(inode);
 		spin_unlock(&inode->i_lock);
 		spin_unlock(&sb_inode_list_lock);
-		spin_unlock(&inode_lock);
 		/*
-		 * We hold a reference to 'inode' so it couldn't have
-		 * been removed from s_inodes list while we dropped the
-		 * inode_lock.  We cannot iput the inode now as we can
-		 * be holding the last reference and we cannot iput it
-		 * under inode_lock. So we keep the reference and iput
-		 * it later.
+		 * We hold a reference to 'inode' so it couldn't have been
+		 * removed from s_inodes list while we dropped the
+		 * sb_inode_list_lock.  We cannot iput the inode now as we can
+		 * be holding the last reference and we cannot iput it under
+		 * spinlock. So we keep the reference and iput it later.
 		 */
 		iput(old_inode);
 		old_inode = inode;
@@ -1130,11 +1112,9 @@ static void wait_sb_inodes(struct super_
 
 		cond_resched();
 
-		spin_lock(&inode_lock);
 		spin_lock(&sb_inode_list_lock);
 	}
 	spin_unlock(&sb_inode_list_lock);
-	spin_unlock(&inode_lock);
 	iput(old_inode);
 }
 
@@ -1237,13 +1217,11 @@ int write_inode_now(struct inode *inode,
 		wbc.nr_to_write = 0;
 
 	might_sleep();
-	spin_lock(&inode_lock);
 	spin_lock(&inode->i_lock);
 	spin_lock(&wb_inode_list_lock);
 	ret = writeback_single_inode(inode, &wbc);
 	spin_unlock(&wb_inode_list_lock);
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&inode_lock);
 	if (sync)
 		inode_sync_wait(inode);
 	return ret;
@@ -1265,13 +1243,11 @@ int sync_inode(struct inode *inode, stru
 {
 	int ret;
 
-	spin_lock(&inode_lock);
 	spin_lock(&inode->i_lock);
 	spin_lock(&wb_inode_list_lock);
 	ret = writeback_single_inode(inode, wbc);
 	spin_unlock(&wb_inode_list_lock);
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&inode_lock);
 	return ret;
 }
 EXPORT_SYMBOL(sync_inode);
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c	2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/inode.c	2010-10-21 23:50:42.000000000 +1100
@@ -29,8 +29,6 @@
  * Icache locking
  *
  * Usage:
- * inode_lock protects:
- *   everything
  * inode->i_lock protects:
  *   i_count
  *   i_state
@@ -45,12 +43,11 @@
  *   inode_in_use, inode_unused, b_io, b_more_io, b_dirty, i_list
  *
  * Ordering:
- * inode_lock
- *   i_lock
- *     sb_inode_list_lock
- *       wb_inode_list_lock
- *     inode_hash_lock
- *       wb_inode_list_lock
+ * i_lock
+ *   sb_inode_list_lock
+ *     wb_inode_list_lock
+ *   inode_hash_lock
+ *     wb_inode_list_lock
  */
 /*
  * This is needed for the following functions:
@@ -109,7 +106,6 @@ static struct hlist_head *inode_hashtabl
  * NOTE! You also have to own the lock if you change
  * the i_state of an inode while it is in use..
  */
-DEFINE_SPINLOCK(inode_lock);
 DEFINE_SPINLOCK(sb_inode_list_lock);
 DEFINE_SPINLOCK(wb_inode_list_lock);
 static DEFINE_SPINLOCK(inode_hash_lock);
@@ -175,7 +171,7 @@ static struct kmem_cache *inode_cachep _
 static void wake_up_inode(struct inode *inode)
 {
 	/*
-	 * Prevent speculative execution through spin_unlock(&inode_lock);
+	 * Prevent speculative execution through spin_unlock(&inode->i_lock);
 	 */
 	smp_mb();
 	wake_up_bit(&inode->i_state, __I_NEW);
@@ -366,7 +362,6 @@ EXPORT_SYMBOL(__inode_get);
  */
 void inode_get_ilock_wblock(struct inode *inode)
 {
- 	assert_spin_locked(&inode_lock);
  	assert_spin_locked(&inode->i_lock);
 	assert_spin_locked(&wb_inode_list_lock);
 	BUG_ON(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE));
@@ -381,11 +376,10 @@ void inode_get_ilock_wblock(struct inode
 }
 
 /*
- * inode_lock must be held
+ * i_lock must be held
  */
 void inode_get_ilock(struct inode *inode)
 {
- 	assert_spin_locked(&inode_lock);
  	assert_spin_locked(&inode->i_lock);
 	BUG_ON(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE));
 	inode->i_count++;
@@ -458,7 +452,6 @@ static void dispose_list(struct list_hea
 
 		evict(inode);
 
-		spin_lock(&inode_lock);
 		spin_lock(&inode->i_lock);
 		spin_lock(&inode_hash_lock);
 		hlist_del_init(&inode->i_hash);
@@ -467,7 +460,6 @@ static void dispose_list(struct list_hea
 		list_del_init(&inode->i_sb_list);
 		spin_unlock(&sb_inode_list_lock);
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&inode_lock);
 
 		wake_up_inode(inode);
 		destroy_inode(inode);
@@ -563,7 +555,7 @@ static int can_unuse(struct inode *inode
 
 /*
  * Scan `goal' inodes on the unused list for freeable ones. They are moved to
- * a temporary list and then are freed outside inode_lock by dispose_list().
+ * a temporary list and then are freed outside LRU lock by dispose_list().
  *
  * Any inodes which are pinned purely because of attached pagecache have their
  * pagecache removed.  We expect the final iput() on that inode to add it to
@@ -582,7 +574,6 @@ static void prune_icache(int nr_to_scan)
 	unsigned long reap = 0;
 
 	down_read(&iprune_sem);
-	spin_lock(&inode_lock);
 lock_again:
 	spin_lock(&wb_inode_list_lock);
 	for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
@@ -608,12 +599,10 @@ static void prune_icache(int nr_to_scan)
 			inode_get_ilock_wblock(inode);
 			spin_unlock(&wb_inode_list_lock);
 			spin_unlock(&inode->i_lock);
-			spin_unlock(&inode_lock);
 			if (remove_inode_buffers(inode))
 				reap += invalidate_mapping_pages(&inode->i_data,
 								0, -1);
 			iput(inode);
-			spin_lock(&inode_lock);
 lock_again_2:
 			spin_lock(&wb_inode_list_lock);
 			if (!spin_trylock(&inode->i_lock)) {
@@ -644,7 +633,6 @@ static void prune_icache(int nr_to_scan)
 	else
 		__count_vm_events(PGINODESTEAL, reap);
 	spin_unlock(&wb_inode_list_lock);
-	spin_unlock(&inode_lock);
 
 	dispose_list(&freeable);
 	up_read(&iprune_sem);
@@ -780,9 +768,9 @@ __inode_add_to_lists(struct super_block
  * @inode: inode to mark in use
  *
  * When an inode is allocated it needs to be accounted for, added to the in use
- * list, the owning superblock and the inode hash. This needs to be done under
- * the inode_lock, so export a function to do this rather than the inode lock
- * itself. We calculate the hash list to add to here so it is all internal
+ * list, the owning superblock and the inode hash.
+ *
+ * We calculate the hash list to add to here so it is all internal
  * which requires the caller to have already set up the inode number in the
  * inode to add.
  */
@@ -790,11 +778,9 @@ void inode_add_to_lists(struct super_blo
 {
 	struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino);
 
-	spin_lock(&inode_lock);
 	spin_lock(&inode->i_lock);
 	__inode_add_to_lists(sb, head, inode);
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&inode_lock);
 }
 EXPORT_SYMBOL_GPL(inode_add_to_lists);
 
@@ -820,17 +806,13 @@ struct inode *new_inode(struct super_blo
 	static atomic_t last_ino = ATOMIC_INIT(0);
 	struct inode *inode;
 
-	spin_lock_prefetch(&inode_lock);
-
 	inode = alloc_inode(sb);
 	if (inode) {
-		spin_lock(&inode_lock);
 		spin_lock(&inode->i_lock);
 		inode->i_ino = (unsigned int)atomic_inc_return(&last_ino);
 		inode->i_state = 0;
 		__inode_add_to_lists(sb, NULL, inode);
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&inode_lock);
 	}
 	return inode;
 }
@@ -889,7 +871,6 @@ static struct inode *get_new_inode(struc
 	if (inode) {
 		struct inode *old;
 
-		spin_lock(&inode_lock);
 		spin_lock(&inode_hash_lock);
 		/* We released the lock, so.. */
 		old = find_inode(sb, head, test, data);
@@ -903,7 +884,6 @@ static struct inode *get_new_inode(struc
 			spin_unlock(&inode_hash_lock);
 			__inode_add_to_lists(sb, NULL, inode);
 			spin_unlock(&inode->i_lock);
-			spin_unlock(&inode_lock);
 
 			/* Return the locked inode with I_NEW set, the
 			 * caller is responsible for filling in the contents
@@ -919,7 +899,6 @@ static struct inode *get_new_inode(struc
 		inode_get_ilock(old);
 		spin_unlock(&inode_hash_lock);
 		spin_unlock(&old->i_lock);
-		spin_unlock(&inode_lock);
 		destroy_inode(inode);
 		inode = old;
 		wait_on_inode(inode);
@@ -928,7 +907,6 @@ static struct inode *get_new_inode(struc
 
 set_failed:
 	spin_unlock(&inode_hash_lock);
-	spin_unlock(&inode_lock);
 	destroy_inode(inode);
 	return NULL;
 }
@@ -946,7 +924,6 @@ static struct inode *get_new_inode_fast(
 	if (inode) {
 		struct inode *old;
 
-		spin_lock(&inode_lock);
 		spin_lock(&inode_hash_lock);
 		/* We released the lock, so.. */
 		old = find_inode_fast(sb, head, ino);
@@ -958,7 +935,6 @@ static struct inode *get_new_inode_fast(
 			spin_unlock(&inode_hash_lock);
 			__inode_add_to_lists(sb, NULL, inode);
 			spin_unlock(&inode->i_lock);
-			spin_unlock(&inode_lock);
 
 			/* Return the locked inode with I_NEW set, the
 			 * caller is responsible for filling in the contents
@@ -974,7 +950,6 @@ static struct inode *get_new_inode_fast(
 		inode_get_ilock(old);
 		spin_unlock(&inode_hash_lock);
 		spin_unlock(&old->i_lock);
-		spin_unlock(&inode_lock);
 		destroy_inode(inode);
 		inode = old;
 		wait_on_inode(inode);
@@ -1034,7 +1009,6 @@ ino_t iunique(struct super_block *sb, in
 	static unsigned int counter;
 	ino_t res;
 
-	spin_lock(&inode_lock);
 	spin_lock(&unique_lock);
 	do {
 		if (counter <= max_reserved)
@@ -1042,7 +1016,6 @@ ino_t iunique(struct super_block *sb, in
 		res = counter++;
 	} while (!is_ino_hashed(sb, res));
 	spin_unlock(&unique_lock);
-	spin_unlock(&inode_lock);
 
 	return res;
 }
@@ -1052,7 +1025,6 @@ struct inode *igrab(struct inode *inode)
 {
 	struct inode *ret = inode;
 
-	spin_lock(&inode_lock);
 	spin_lock(&inode->i_lock);
 	if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
 		inode_get_ilock(inode);
@@ -1064,7 +1036,6 @@ struct inode *igrab(struct inode *inode)
 		 */
 		ret = NULL;
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&inode_lock);
 
 	return ret;
 }
@@ -1087,7 +1058,7 @@ EXPORT_SYMBOL(igrab);
  *
  * Otherwise NULL is returned.
  *
- * Note, @test is called with the inode_lock held, so can't sleep.
+ * Note, @test is called with the inode_hash_lock held, so can't sleep.
  */
 static struct inode *ifind(struct super_block *sb,
 		struct hlist_head *head, int (*test)(struct inode *, void *),
@@ -1095,20 +1066,17 @@ static struct inode *ifind(struct super_
 {
 	struct inode *inode;
 
-	spin_lock(&inode_lock);
 	spin_lock(&inode_hash_lock);
 	inode = find_inode(sb, head, test, data);
 	if (inode) {
 		inode_get_ilock(inode);
 		spin_unlock(&inode_hash_lock);
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&inode_lock);
 		if (likely(wait))
 			wait_on_inode(inode);
 		return inode;
 	}
 	spin_unlock(&inode_hash_lock);
-	spin_unlock(&inode_lock);
 	return NULL;
 }
 
@@ -1132,19 +1100,16 @@ static struct inode *ifind_fast(struct s
 {
 	struct inode *inode;
 
-	spin_lock(&inode_lock);
 	spin_lock(&inode_hash_lock);
 	inode = find_inode_fast(sb, head, ino);
 	if (inode) {
 		inode_get_ilock(inode);
 		spin_unlock(&inode_hash_lock);
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&inode_lock);
 		wait_on_inode(inode);
 		return inode;
 	}
 	spin_unlock(&inode_hash_lock);
-	spin_unlock(&inode_lock);
 	return NULL;
 }
 
@@ -1167,7 +1132,7 @@ static struct inode *ifind_fast(struct s
  *
  * Otherwise NULL is returned.
  *
- * Note, @test is called with the inode_lock held, so can't sleep.
+ * Note, @test is called with the i_lock held, so can't sleep.
  */
 struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
 		int (*test)(struct inode *, void *), void *data)
@@ -1195,7 +1160,7 @@ EXPORT_SYMBOL(ilookup5_nowait);
  *
  * Otherwise NULL is returned.
  *
- * Note, @test is called with the inode_lock held, so can't sleep.
+ * Note, @test is called with the i_lock held, so can't sleep.
  */
 struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
 		int (*test)(struct inode *, void *), void *data)
@@ -1246,7 +1211,7 @@ EXPORT_SYMBOL(ilookup);
  * inode and this is returned locked, hashed, and with the I_NEW flag set. The
  * file system gets to fill it in before unlocking it via unlock_new_inode().
  *
- * Note both @test and @set are called with the inode_lock held, so can't sleep.
+ * Note both @test and @set are called with the i_lock held, so can't sleep.
  */
 struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
 		int (*test)(struct inode *, void *),
@@ -1307,7 +1272,6 @@ int insert_inode_locked(struct inode *in
 	while (1) {
 		struct hlist_node *node;
 		struct inode *old = NULL;
-		spin_lock(&inode_lock);
 lock_again:
 		spin_lock(&inode_hash_lock);
 		hlist_for_each_entry(old, node, head, i_hash) {
@@ -1330,14 +1294,12 @@ int insert_inode_locked(struct inode *in
 		hlist_add_head(&inode->i_hash, head);
 		spin_unlock(&inode_hash_lock);
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&inode_lock);
 		return 0;
 
 found_old:
 		inode_get_ilock(old);
 		spin_unlock(&inode_hash_lock);
 		spin_unlock(&old->i_lock);
-		spin_unlock(&inode_lock);
 		wait_on_inode(old);
 		if (unlikely(!hlist_unhashed(&old->i_hash))) {
 			iput(old);
@@ -1360,7 +1322,6 @@ int insert_inode_locked4(struct inode *i
 		struct hlist_node *node;
 		struct inode *old = NULL;
 
-		spin_lock(&inode_lock);
 lock_again:
 		spin_lock(&inode_hash_lock);
 		hlist_for_each_entry(old, node, head, i_hash) {
@@ -1383,14 +1344,12 @@ int insert_inode_locked4(struct inode *i
 		hlist_add_head(&inode->i_hash, head);
 		spin_unlock(&inode_hash_lock);
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&inode_lock);
 		return 0;
 
 found_old:
 		inode_get_ilock(old);
 		spin_unlock(&inode_hash_lock);
 		spin_unlock(&old->i_lock);
-		spin_unlock(&inode_lock);
 		wait_on_inode(old);
 		if (unlikely(!hlist_unhashed(&old->i_hash))) {
 			iput(old);
@@ -1412,13 +1371,11 @@ EXPORT_SYMBOL(insert_inode_locked4);
 void __insert_inode_hash(struct inode *inode, unsigned long hashval)
 {
 	struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
-	spin_lock(&inode_lock);
 	spin_lock(&inode->i_lock);
 	spin_lock(&inode_hash_lock);
 	hlist_add_head(&inode->i_hash, head);
 	spin_unlock(&inode_hash_lock);
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&inode_lock);
 }
 EXPORT_SYMBOL(__insert_inode_hash);
 
@@ -1430,13 +1387,11 @@ EXPORT_SYMBOL(__insert_inode_hash);
  */
 void remove_inode_hash(struct inode *inode)
 {
-	spin_lock(&inode_lock);
 	spin_lock(&inode->i_lock);
 	spin_lock(&inode_hash_lock);
 	hlist_del_init(&inode->i_hash);
 	spin_unlock(&inode_hash_lock);
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&inode_lock);
 }
 EXPORT_SYMBOL(remove_inode_hash);
 
@@ -1487,15 +1442,12 @@ static void iput_final(struct inode *ino
 		atomic_inc(&nr_unused);
 		if (sb->s_flags & MS_ACTIVE) {
 			spin_unlock(&inode->i_lock);
-			spin_unlock(&inode_lock);
 			return;
 		}
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state |= I_WILL_FREE;
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&inode_lock);
 		write_inode_now(inode, 1);
-		spin_lock(&inode_lock);
 		spin_lock(&inode->i_lock);
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state &= ~I_WILL_FREE;
@@ -1514,15 +1466,12 @@ static void iput_final(struct inode *ino
 	inode->i_state |= I_FREEING;
 	atomic_dec(&nr_inodes);
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&inode_lock);
 	evict(inode);
-	spin_lock(&inode_lock);
 	spin_lock(&inode->i_lock);
 	spin_lock(&inode_hash_lock);
 	hlist_del_init(&inode->i_hash);
 	spin_unlock(&inode_hash_lock);
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&inode_lock);
 	wake_up_inode(inode);
 	BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
 	destroy_inode(inode);
@@ -1542,16 +1491,12 @@ void iput(struct inode *inode)
 	if (inode) {
 		BUG_ON(inode->i_state & I_CLEAR);
 
-		/* open-code atomic_dec_and_lock */
-		spin_lock(&inode_lock);
 		spin_lock(&inode->i_lock);
 		inode->i_count--;
-		if (inode->i_count == 0) {
+		if (inode->i_count == 0)
 			iput_final(inode);
-		} else {
+		else
 			spin_unlock(&inode->i_lock);
-			spin_unlock(&inode_lock);
-		}
 	}
 }
 EXPORT_SYMBOL(iput);
@@ -1731,8 +1676,6 @@ EXPORT_SYMBOL(inode_wait);
  * It doesn't matter if I_NEW is not set initially, a call to
  * wake_up_inode() after removing from the hash list will DTRT.
  *
- * This is called with inode_lock held.
- *
  * Called with i_lock held and returns with it dropped.
  */
 static void __wait_on_freeing_inode(struct inode *inode)
@@ -1743,10 +1686,8 @@ static void __wait_on_freeing_inode(stru
 	prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
 	spin_unlock(&inode_hash_lock);
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&inode_lock);
 	schedule();
 	finish_wait(wq, &wait.wait);
-	spin_lock(&inode_lock);
 	spin_lock(&inode_hash_lock);
 }
 
Index: linux-2.6/include/linux/writeback.h
===================================================================
--- linux-2.6.orig/include/linux/writeback.h	2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/include/linux/writeback.h	2010-10-21 23:50:40.000000000 +1100
@@ -9,7 +9,6 @@
 
 struct backing_dev_info;
 
-extern spinlock_t inode_lock;
 extern spinlock_t sb_inode_list_lock;
 extern spinlock_t wb_inode_list_lock;
 extern struct list_head inode_in_use;
Index: linux-2.6/fs/quota/dquot.c
===================================================================
--- linux-2.6.orig/fs/quota/dquot.c	2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/quota/dquot.c	2010-10-21 23:50:41.000000000 +1100
@@ -76,7 +76,7 @@
 #include <linux/buffer_head.h>
 #include <linux/capability.h>
 #include <linux/quotaops.h>
-#include <linux/writeback.h> /* for inode_lock, oddly enough.. */
+#include <linux/writeback.h>
 
 #include <asm/uaccess.h>
 
@@ -897,7 +897,6 @@ static void add_dquot_ref(struct super_b
 	int reserved = 0;
 #endif
 
-	spin_lock(&inode_lock);
 lock_again:
 	spin_lock(&sb_inode_list_lock);
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
@@ -926,21 +925,18 @@ static void add_dquot_ref(struct super_b
 		inode_get_ilock(inode);
 		spin_unlock(&sb_inode_list_lock);
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&inode_lock);
 
 		iput(old_inode);
 		__dquot_initialize(inode, type);
 		/* We hold a reference to 'inode' so it couldn't have been
-		 * removed from s_inodes list while we dropped the inode_lock.
-		 * We cannot iput the inode now as we can be holding the last
-		 * reference and we cannot iput it under inode_lock. So we
-		 * keep the reference and iput it later. */
+		 * removed from s_inodes list while we dropped the
+		 * sb_inode_list_lock.  We cannot iput the inode now as we can
+		 * be holding the last reference and we cannot iput it under
+		 * lock. So we keep the reference and iput it later. */
 		old_inode = inode;
-		spin_lock(&inode_lock);
 		spin_lock(&sb_inode_list_lock);
 	}
 	spin_unlock(&sb_inode_list_lock);
-	spin_unlock(&inode_lock);
 	iput(old_inode);
 
 #ifdef CONFIG_QUOTA_DEBUG
@@ -1021,7 +1017,6 @@ static void remove_dquot_ref(struct supe
 	struct inode *inode;
 	int reserved = 0;
 
-	spin_lock(&inode_lock);
 lock_again:
 	spin_lock(&sb_inode_list_lock);
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
@@ -1044,7 +1039,6 @@ static void remove_dquot_ref(struct supe
 		}
 	}
 	spin_unlock(&sb_inode_list_lock);
-	spin_unlock(&inode_lock);
 #ifdef CONFIG_QUOTA_DEBUG
 	if (reserved) {
 		printk(KERN_WARNING "VFS (%s): Writes happened after quota"
Index: linux-2.6/fs/notify/inode_mark.c
===================================================================
--- linux-2.6.orig/fs/notify/inode_mark.c	2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/fs/notify/inode_mark.c	2010-10-21 23:50:41.000000000 +1100
@@ -22,7 +22,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
-#include <linux/writeback.h> /* for inode_lock */
+#include <linux/writeback.h>
 
 #include <asm/atomic.h>
 
@@ -232,16 +232,14 @@ int fsnotify_add_inode_mark(struct fsnot
  * fsnotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
  * @list: list of inodes being unmounted (sb->s_inodes)
  *
- * Called with inode_lock held, protecting the unmounting super block's list
- * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
- * We temporarily drop inode_lock, however, and CAN block.
+ * Called with iprune_mutex held, keeping shrink_icache_memory() at bay.
+ * sb_inode_list_lock to protect the super block's list of inodes.
  */
 void fsnotify_unmount_inodes(struct super_block *sb)
 {
 	struct list_head *list = &sb->s_inodes;
 	struct inode *inode, *next_i, *need_iput = NULL;
 
-	spin_lock(&inode_lock);
 lock_again:
 	spin_lock(&sb_inode_list_lock);
 	list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
@@ -307,13 +305,12 @@ void fsnotify_unmount_inodes(struct supe
 		spin_unlock(&next_i->i_lock);
 
 		/*
-		 * We can safely drop inode_lock here because we hold
+		 * We can safely drop sb_inode_list_lock here because we hold
 		 * references on both inode and next_i.  Also no new inodes
 		 * will be added since the umount has begun.  Finally,
 		 * iprune_mutex keeps shrink_icache_memory() away.
 		 */
 		spin_unlock(&sb_inode_list_lock);
-		spin_unlock(&inode_lock);
 
 		if (need_iput_tmp)
 			iput(need_iput_tmp);
@@ -325,9 +322,7 @@ void fsnotify_unmount_inodes(struct supe
 
 		iput(inode);
 
-		spin_lock(&inode_lock);
 		spin_lock(&sb_inode_list_lock);
 	}
 	spin_unlock(&sb_inode_list_lock);
-	spin_unlock(&inode_lock);
 }
Index: linux-2.6/mm/backing-dev.c
===================================================================
--- linux-2.6.orig/mm/backing-dev.c	2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/mm/backing-dev.c	2010-10-21 23:50:39.000000000 +1100
@@ -73,7 +73,6 @@ static int bdi_debug_stats_show(struct s
 	struct inode *inode;
 
 	nr_wb = nr_dirty = nr_io = nr_more_io = 0;
-	spin_lock(&inode_lock);
 	spin_lock(&wb_inode_list_lock);
 	list_for_each_entry(inode, &wb->b_dirty, i_list)
 		nr_dirty++;
@@ -82,7 +81,6 @@ static int bdi_debug_stats_show(struct s
 	list_for_each_entry(inode, &wb->b_more_io, i_list)
 		nr_more_io++;
 	spin_unlock(&wb_inode_list_lock);
-	spin_unlock(&inode_lock);
 
 	global_dirty_limits(&background_thresh, &dirty_thresh);
 	bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
@@ -684,13 +682,11 @@ void bdi_destroy(struct backing_dev_info
 	if (bdi_has_dirty_io(bdi)) {
 		struct bdi_writeback *dst = &default_backing_dev_info.wb;
 
-		spin_lock(&inode_lock);
 		spin_lock(&wb_inode_list_lock);
 		list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
 		list_splice(&bdi->wb.b_io, &dst->b_io);
 		list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
 		spin_unlock(&wb_inode_list_lock);
-		spin_unlock(&inode_lock);
 	}
 
 	bdi_unregister(bdi);
Index: linux-2.6/mm/filemap.c
===================================================================
--- linux-2.6.orig/mm/filemap.c	2010-10-21 23:49:57.000000000 +1100
+++ linux-2.6/mm/filemap.c	2010-10-21 23:50:27.000000000 +1100
@@ -80,7 +80,7 @@
  *  ->i_mutex
  *    ->i_alloc_sem             (various)
  *
- *  ->inode_lock
+ *  ->i_lock
  *    ->sb_lock			(fs/fs-writeback.c)
  *    ->mapping->tree_lock	(__sync_single_inode)
  *
@@ -98,8 +98,8 @@
  *    ->zone.lru_lock		(check_pte_range->isolate_lru_page)
  *    ->private_lock		(page_remove_rmap->set_page_dirty)
  *    ->tree_lock		(page_remove_rmap->set_page_dirty)
- *    ->inode_lock		(page_remove_rmap->set_page_dirty)
- *    ->inode_lock		(zap_pte_range->set_page_dirty)
+ *    ->i_lock			(page_remove_rmap->set_page_dirty)
+ *    ->i_lock			(zap_pte_range->set_page_dirty)
  *    ->private_lock		(zap_pte_range->__set_page_dirty_buffers)
  *
  *  ->task->proc_lock
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c	2010-10-21 23:49:52.000000000 +1100
+++ linux-2.6/mm/rmap.c	2010-10-21 23:50:27.000000000 +1100
@@ -31,11 +31,11 @@
  *             swap_lock (in swap_duplicate, swap_info_get)
  *               mmlist_lock (in mmput, drain_mmlist and others)
  *               mapping->private_lock (in __set_page_dirty_buffers)
- *               inode_lock (in set_page_dirty's __mark_inode_dirty)
- *                 sb_lock (within inode_lock in fs/fs-writeback.c)
+ *               i_lock (in set_page_dirty's __mark_inode_dirty)
+ *                 sb_lock (within i_lock in fs/fs-writeback.c)
  *                 mapping->tree_lock (widely used, in set_page_dirty,
  *                           in arch-dependent flush_dcache_mmap_lock,
- *                           within inode_lock in __sync_single_inode)
+ *                           within i_lock in __sync_single_inode)
  *
  * (code doesn't rely on that order so it could be switched around)
  * ->tasklist_lock
Index: linux-2.6/Documentation/filesystems/Locking
===================================================================
--- linux-2.6.orig/Documentation/filesystems/Locking	2010-10-21 23:49:52.000000000 +1100
+++ linux-2.6/Documentation/filesystems/Locking	2010-10-21 23:50:27.000000000 +1100
@@ -114,7 +114,7 @@ of the locking scheme for directory oper
 destroy_inode:
 dirty_inode:				(must not sleep)
 write_inode:
-drop_inode:				!!!inode_lock!!!
+drop_inode:				!!!i_lock, sb_inode_list_lock!!!
 evict_inode:
 put_super:		write
 write_super:		read
Index: linux-2.6/Documentation/filesystems/vfs.txt
===================================================================
--- linux-2.6.orig/Documentation/filesystems/vfs.txt	2010-10-21 23:49:52.000000000 +1100
+++ linux-2.6/Documentation/filesystems/vfs.txt	2010-10-21 23:50:27.000000000 +1100
@@ -246,7 +246,7 @@ or bottom half).
 	should be synchronous or not, not all filesystems check this flag.
 
   drop_inode: called when the last access to the inode is dropped,
-	with the inode_lock spinlock held.
+	with the i_lock and sb_inode_list_lock spinlock held.
 
 	This method should be either NULL (normal UNIX filesystem
 	semantics) or "generic_delete_inode" (for filesystems that do not
Index: linux-2.6/fs/ntfs/inode.c
===================================================================
--- linux-2.6.orig/fs/ntfs/inode.c	2010-10-21 23:49:52.000000000 +1100
+++ linux-2.6/fs/ntfs/inode.c	2010-10-21 23:50:27.000000000 +1100
@@ -54,7 +54,7 @@
  *
  * Return 1 if the attributes match and 0 if not.
  *
- * NOTE: This function runs with the inode_lock spin lock held so it is not
+ * NOTE: This function runs with the i_lock spin lock held so it is not
  * allowed to sleep.
  */
 int ntfs_test_inode(struct inode *vi, ntfs_attr *na)
@@ -98,7 +98,7 @@ int ntfs_test_inode(struct inode *vi, nt
  *
  * Return 0 on success and -errno on error.
  *
- * NOTE: This function runs with the inode_lock spin lock held so it is not
+ * NOTE: This function runs with the i_lock spin lock held so it is not
  * allowed to sleep. (Hence the GFP_ATOMIC allocation.)
  */
 static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na)
Index: linux-2.6/fs/ocfs2/inode.c
===================================================================
--- linux-2.6.orig/fs/ocfs2/inode.c	2010-10-21 23:49:52.000000000 +1100
+++ linux-2.6/fs/ocfs2/inode.c	2010-10-21 23:50:27.000000000 +1100
@@ -1195,7 +1195,7 @@ void ocfs2_evict_inode(struct inode *ino
 	ocfs2_clear_inode(inode);
 }
 
-/* Called under inode_lock, with no more references on the
+/* Called under i_lock, with no more references on the
  * struct inode, so it's safe here to check the flags field
  * and to manipulate i_nlink without any other locks. */
 int ocfs2_drop_inode(struct inode *inode)
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h	2010-10-21 23:50:27.000000000 +1100
+++ linux-2.6/include/linux/fs.h	2010-10-21 23:50:40.000000000 +1100
@@ -1585,7 +1585,7 @@ struct super_operations {
 };
 
 /*
- * Inode state bits.  Protected by inode_lock.
+ * Inode state bits.  Protected by i_lock.
  *
  * Three bits determine the dirty state of the inode, I_DIRTY_SYNC,
  * I_DIRTY_DATASYNC and I_DIRTY_PAGES.
Index: linux-2.6/Documentation/filesystems/porting
===================================================================
--- linux-2.6.orig/Documentation/filesystems/porting	2010-10-21 23:49:52.000000000 +1100
+++ linux-2.6/Documentation/filesystems/porting	2010-10-21 23:50:27.000000000 +1100
@@ -299,7 +299,7 @@ be used instead.  It gets called wheneve
 remaining links or not.  Caller does *not* evict the pagecache or inode-associated
 metadata buffers; getting rid of those is responsibility of method, as it had
 been for ->delete_inode().
-	->drop_inode() returns int now; it's called on final iput() with inode_lock
+	->drop_inode() returns int now; it's called on final iput() with i_lock
 held and it returns true if filesystems wants the inode to be dropped.  As before,
 generic_drop_inode() is still the default and it's been updated appropriately.
 generic_delete_inode() is also alive and it consists simply of return 1.  Note that
@@ -318,3 +318,11 @@ if it's zero is not *and* *never* *had*
 may happen while the inode is in the middle of ->write_inode(); e.g. if you blindly
 free the on-disk inode, you may end up doing that while ->write_inode() is writing
 to it.
+
+--
+[mandatory]
+	inode_lock is gone, replaced by fine grained locks. See fs/inode.c
+for details of what locks to replace inode_lock with in order to protect
+particular things. Most of the time, a filesystem only needs ->i_lock, which
+protects *all* the inode state and its membership on lists that was
+previously protected with inode_lock.


--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux