[PATCH] ext4: Fix circular locking dependency with fallocate and touch.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



In order to prevent a circular locking dependency when an ext4_create
operation is racing with an ext4_fallocate, we acquire and release
i_data_sem for each multiblock request and use i_mutex to
prevent writes and truncates during the complete fallocate operation.


=======================================================
[ INFO: possible circular locking dependency detected ]
2.6.25-rc1 #4
-------------------------------------------------------
touch/2347 is trying to acquire lock:
 (&ei->i_data_sem){----}, at: [<c01cffed>] ext4_get_blocks_wrap+0x21/0xca

but task is already holding lock:
 (jbd2_handle){--..}, at: [<c01ee43c>] jbd2_journal_start+0xce/0xf0

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #1 (jbd2_handle){--..}:
       [<c013b2e3>] __lock_acquire+0x960/0xb13
       [<c01ee43c>] jbd2_journal_start+0xce/0xf0
       [<c013b502>] lock_acquire+0x6c/0x89
       [<c01ee43c>] jbd2_journal_start+0xce/0xf0
       [<c01ee451>] jbd2_journal_start+0xe3/0xf0
       [<c01ee43c>] jbd2_journal_start+0xce/0xf0
       [<c01d66c5>] ext4_journal_start_sb+0x40/0x42
       [<c01dd2bd>] ext4_fallocate+0x156/0x46b
       [<c01527dd>] __do_fault+0x2ea/0x324
       [<c0108c60>] native_sched_clock+0x8d/0x9f
       [<c0108c60>] native_sched_clock+0x8d/0x9f
       [<c016531a>] fget+0x7d/0x9b
       [<c016302a>] sys_fallocate+0xcc/0xf0
       [<c0104992>] sysenter_past_esp+0x5f/0xa5
       [<ffffffff>] 0xffffffff

-> #0 (&ei->i_data_sem){----}:
       [<c013b20a>] __lock_acquire+0x887/0xb13
       [<c013b502>] lock_acquire+0x6c/0x89
       [<c01cffed>] ext4_get_blocks_wrap+0x21/0xca
       [<c043353b>] down_read+0x30/0x6a
       [<c01cffed>] ext4_get_blocks_wrap+0x21/0xca
       [<c01cffed>] ext4_get_blocks_wrap+0x21/0xca
       [<c01d00df>] ext4_getblk+0x49/0x18f
       [<c01dde4d>] __ext4_journal_dirty_metadata+0x19/0x3c
       [<c01ceff7>] ext4_mark_iloc_dirty+0x380/0x3e5
       [<c01d0ed0>] ext4_bread+0x14/0x78
       [<c01d3b49>] ext4_add_entry+0x483/0x775
       [<c01ce4d5>] ext4_new_inode+0xa13/0xa3d
       [<c043418c>] _spin_unlock+0x1d/0x20
       [<c01ee451>] jbd2_journal_start+0xe3/0xf0
       [<c01d43e0>] ext4_add_nondir+0x15/0x42
       [<c01d48a7>] ext4_create+0xab/0xdf
       [<c01d47fc>] ext4_create+0x0/0xdf
       [<c016ad14>] vfs_create+0x67/0xad
       [<c016cfc2>] open_namei+0x15c/0x512
       [<c0162e35>] do_filp_open+0x1f/0x35
       [<c0162c08>] get_unused_fd_flags+0xd4/0xde
       [<c043418c>] _spin_unlock+0x1d/0x20
       [<c0162e8d>] do_sys_open+0x42/0xbc
       [<c0162f33>] sys_open+0x16/0x18
       [<c0104992>] sysenter_past_esp+0x5f/0xa5
       [<ffffffff>] 0xffffffff

other info that might help us debug this:

2 locks held by touch/2347:
 #0:  (&type->i_mutex_dir_key#5){--..}, at: [<c016cf35>] open_namei+0xcf/0x512
 #1:  (jbd2_handle){--..}, at: [<c01ee43c>] jbd2_journal_start+0xce/0xf0

stack backtrace:
Pid: 2347, comm: touch Not tainted 2.6.25-rc1 #4
 [<c01394cd>] print_circular_bug_tail+0x5b/0x66
 [<c013b20a>] __lock_acquire+0x887/0xb13
 [<c013b502>] lock_acquire+0x6c/0x89
 [<c01cffed>] ? ext4_get_blocks_wrap+0x21/0xca
 [<c043353b>] down_read+0x30/0x6a
 [<c01cffed>] ? ext4_get_blocks_wrap+0x21/0xca
 [<c01cffed>] ext4_get_blocks_wrap+0x21/0xca
 [<c01d00df>] ext4_getblk+0x49/0x18f
 [<c01dde4d>] ? __ext4_journal_dirty_metadata+0x19/0x3c
 [<c01ceff7>] ? ext4_mark_iloc_dirty+0x380/0x3e5
 [<c01d0ed0>] ext4_bread+0x14/0x78
 [<c01d3b49>] ext4_add_entry+0x483/0x775
 [<c01ce4d5>] ? ext4_new_inode+0xa13/0xa3d
 [<c043418c>] ? _spin_unlock+0x1d/0x20
 [<c01ee451>] ? jbd2_journal_start+0xe3/0xf0
 [<c01d43e0>] ext4_add_nondir+0x15/0x42
 [<c01d48a7>] ext4_create+0xab/0xdf
 [<c01d47fc>] ? ext4_create+0x0/0xdf
 [<c016ad14>] vfs_create+0x67/0xad
 [<c016cfc2>] open_namei+0x15c/0x512
 [<c0162e35>] do_filp_open+0x1f/0x35
 [<c0162c08>] ? get_unused_fd_flags+0xd4/0xde
 [<c043418c>] ? _spin_unlock+0x1d/0x20
 [<c0162e8d>] do_sys_open+0x42/0xbc
 [<c0162f33>] sys_open+0x16/0x18
 [<c0104992>] sysenter_past_esp+0x5f/0xa5
 =======================

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxxxxxxx>
---
 fs/ext4/extents.c |   10 +++-------
 1 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index bc7081f..e856f66 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2623,7 +2623,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
 	 * modify 1 super block, 1 block bitmap and 1 group descriptor.
 	 */
 	credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3;
-	down_write((&EXT4_I(inode)->i_data_sem));
+	mutex_lock(&inode->i_mutex);
 retry:
 	while (ret >= 0 && ret < max_blocks) {
 		block = block + ret;
@@ -2634,7 +2634,7 @@ retry:
 			break;
 		}
 
-		ret = ext4_ext_get_blocks(handle, inode, block,
+		ret = ext4_get_blocks_wrap(handle, inode, block,
 					  max_blocks, &map_bh,
 					  EXT4_CREATE_UNINITIALIZED_EXT, 0);
 		WARN_ON(ret <= 0);
@@ -2680,7 +2680,6 @@ retry:
 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
 
-	up_write((&EXT4_I(inode)->i_data_sem));
 	/*
 	 * Time to update the file size.
 	 * Update only when preallocation was requested beyond the file size.
@@ -2692,21 +2691,18 @@ retry:
 			 * if no error, we assume preallocation succeeded
 			 * completely
 			 */
-			mutex_lock(&inode->i_mutex);
 			i_size_write(inode, offset + len);
 			EXT4_I(inode)->i_disksize = i_size_read(inode);
-			mutex_unlock(&inode->i_mutex);
 		} else if (ret < 0 && nblocks) {
 			/* Handle partial allocation scenario */
 			loff_t newsize;
 
-			mutex_lock(&inode->i_mutex);
 			newsize  = (nblocks << blkbits) + i_size_read(inode);
 			i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits));
 			EXT4_I(inode)->i_disksize = i_size_read(inode);
-			mutex_unlock(&inode->i_mutex);
 		}
 	}
 
+	mutex_unlock(&inode->i_mutex);
 	return ret > 0 ? ret2 : ret;
 }
-- 
1.5.4.1.97.g40aab-dirty

-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux