[PATCH] ext4: fix deadlock of i_data_sem in ext4_mark_inode_dirty()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



There are multiple places where ext4_mark_inode_dirty() is called holding
write lock of EXT4_I(inode)->i_data_sem. However, if
ext4_mark_inode_dirty() needs to expand inode size, this will cause
deadlock when ext4_xattr_block_set() tries to get read lock of
EXT4_I(inode)->i_data_sem.

Following was the messages dumped when this problem happened:

INFO: task plymouthd:124 blocked for more than 120 seconds.
      Not tainted 3.16.0-rc5+ #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
plymouthd       D 0000000000000000     0   124      1 0x00000000
 ffff8800378c38b0 0000000000000082 ffffffff81285998 ffff8800378c0010
 0000000000012b80 0000000000012b80 ffff88011a49e010 ffff88011a55cf50
 ffffffff8108e210 ffff88011a49e010 ffff880037d050a0 fffffffeffffffff
Call Trace:
 [<ffffffff81285998>] ? cfq_dispatch_requests+0x48/0x2b0
 [<ffffffff8108e210>] ? account_entity_dequeue+0x80/0xa0
 [<ffffffff81592ef9>] schedule+0x29/0x70
 [<ffffffff815955dd>] rwsem_down_read_failed+0x9d/0xf0
 [<ffffffff812978d4>] call_rwsem_down_read_failed+0x14/0x30
 [<ffffffff81595054>] ? down_read+0x24/0x30
 [<ffffffffa00cefb6>] ext4_xattr_block_set+0x546/0x6a0 [ext4]
 [<ffffffffa00cfc13>] ext4_expand_extra_isize_ea+0x4b3/0x8b0 [ext4]
 [<ffffffffa0090b06>] ext4_mark_inode_dirty+0x1a6/0x240 [ext4]
 [<ffffffffa0091c12>] ext4_setattr+0x452/0x5f0 [ext4]
 [<ffffffff811b0baa>] notify_change+0x1ca/0x330
 [<ffffffff81193a36>] do_truncate+0x66/0xa0
 [<ffffffff8124ab16>] ? ima_file_check+0x36/0x50
 [<ffffffff811a548d>] do_last+0x6bd/0x8c0
 [<ffffffff8119f700>] ? __inode_permission+0x20/0xc0
 [<ffffffff811a5754>] path_openat+0xc4/0x480
 [<ffffffff811d4fe0>] ? ep_read_events_proc+0xc0/0xc0
 [<ffffffff811a5c4a>] do_filp_open+0x4a/0xa0
 [<ffffffff811b215d>] ? __alloc_fd+0xcd/0x140
 [<ffffffff8119435a>] do_sys_open+0x11a/0x230
 [<ffffffff811944ae>] SyS_open+0x1e/0x20
 [<ffffffff81596592>] system_call_fastpath+0x16/0x1b

Signed-off-by: Li Xi <lixi <at> ddn.com>
---
Index: linux.git/fs/ext4/inode.c
===================================================================
--- linux.git.orig/fs/ext4/inode.c
+++ linux.git/fs/ext4/inode.c
@@ -2270,8 +2270,8 @@ static int mpage_map_and_submit_extent(h
             disksize = i_size;
         if (disksize > EXT4_I(inode)->i_disksize)
             EXT4_I(inode)->i_disksize = disksize;
-        err2 = ext4_mark_inode_dirty(handle, inode);
         up_write(&EXT4_I(inode)->i_data_sem);
+        err2 = ext4_mark_inode_dirty(handle, inode);
         if (err2)
             ext4_error(inode->i_sb,
                    "Failed to mark inode %lu dirty",
@@ -4650,9 +4650,11 @@ int ext4_setattr(struct dentry *dentry,
             }
             down_write(&EXT4_I(inode)->i_data_sem);
             EXT4_I(inode)->i_disksize = attr->ia_size;
+            up_write(&EXT4_I(inode)->i_data_sem);
             rc = ext4_mark_inode_dirty(handle, inode);
             if (!error)
                 error = rc;
+            down_write(&EXT4_I(inode)->i_data_sem);
             /*
              * We have to update i_size under i_data_sem together
              * with i_disksize to avoid races with writeback code
Index: linux.git/fs/ext4/ioctl.c
===================================================================
--- linux.git.orig/fs/ext4/ioctl.c
+++ linux.git/fs/ext4/ioctl.c
@@ -169,7 +169,9 @@ static long swap_inode_boot_loader(struc

     ext4_discard_preallocations(inode);

+    ext4_double_up_write_data_sem(inode, inode_bl);
     err = ext4_mark_inode_dirty(handle, inode);
+    ext4_double_down_write_data_sem(inode, inode_bl);
     if (err < 0) {
         ext4_warning(inode->i_sb,
             "couldn't mark inode #%lu dirty (err %d)",
@@ -177,14 +179,18 @@ static long swap_inode_boot_loader(struc
         /* Revert all changes: */
         swap_inode_data(inode, inode_bl);
     } else {
+        ext4_double_up_write_data_sem(inode, inode_bl);
         err = ext4_mark_inode_dirty(handle, inode_bl);
+        ext4_double_down_write_data_sem(inode, inode_bl);
         if (err < 0) {
             ext4_warning(inode_bl->i_sb,
                 "couldn't mark inode #%lu dirty (err %d)",
                 inode_bl->i_ino, err);
             /* Revert all changes: */
             swap_inode_data(inode, inode_bl);
+            ext4_double_up_write_data_sem(inode, inode_bl);
             ext4_mark_inode_dirty(handle, inode);
+            ext4_double_down_write_data_sem(inode, inode_bl);
         }
     }
     ext4_journal_stop(handle);
Index: linux.git/fs/ext4/migrate.c
===================================================================
--- linux.git.orig/fs/ext4/migrate.c
+++ linux.git/fs/ext4/migrate.c
@@ -635,14 +635,17 @@ int ext4_ind_migrate(struct inode *inode

     down_write(&EXT4_I(inode)->i_data_sem);
     ret = ext4_ext_check_inode(inode);
-    if (ret)
+    if (ret) {
+        up_write(&EXT4_I(inode)->i_data_sem);
         goto errout;
+    }

     eh = ext_inode_hdr(inode);
     ex  = EXT_FIRST_EXTENT(eh);
     if (ext4_blocks_count(es) > EXT4_MAX_BLOCK_FILE_PHYS ||
         eh->eh_depth != 0 || le16_to_cpu(eh->eh_entries) > 1) {
         ret = -EOPNOTSUPP;
+        up_write(&EXT4_I(inode)->i_data_sem);
         goto errout;
     }
     if (eh->eh_entries == 0)
@@ -652,6 +655,7 @@ int ext4_ind_migrate(struct inode *inode
         blk = ext4_ext_pblock(ex);
         if (len > EXT4_NDIR_BLOCKS) {
             ret = -EOPNOTSUPP;
+            up_write(&EXT4_I(inode)->i_data_sem);
             goto errout;
         }
     }
@@ -660,9 +664,9 @@ int ext4_ind_migrate(struct inode *inode
     memset(ei->i_data, 0, sizeof(ei->i_data));
     for (i=0; i < len; i++)
         ei->i_data[i] = cpu_to_le32(blk++);
+    up_write(&EXT4_I(inode)->i_data_sem);
     ext4_mark_inode_dirty(handle, inode);
 errout:
     ext4_journal_stop(handle);
-    up_write(&EXT4_I(inode)->i_data_sem);
     return ret;
 }
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux