There are multiple places where ext4_mark_inode_dirty() is called holding write lock of EXT4_I(inode)->i_data_sem. However, if ext4_mark_inode_dirty() needs to expand inode size, this will cause deadlock when ext4_xattr_block_set() tries to get read lock of EXT4_I(inode)->i_data_sem. Following was the messages dumped when this problem happened: INFO: task plymouthd:124 blocked for more than 120 seconds. Not tainted 3.16.0-rc5+ #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. plymouthd D 0000000000000000 0 124 1 0x00000000 ffff8800378c38b0 0000000000000082 ffffffff81285998 ffff8800378c0010 0000000000012b80 0000000000012b80 ffff88011a49e010 ffff88011a55cf50 ffffffff8108e210 ffff88011a49e010 ffff880037d050a0 fffffffeffffffff Call Trace: [<ffffffff81285998>] ? cfq_dispatch_requests+0x48/0x2b0 [<ffffffff8108e210>] ? account_entity_dequeue+0x80/0xa0 [<ffffffff81592ef9>] schedule+0x29/0x70 [<ffffffff815955dd>] rwsem_down_read_failed+0x9d/0xf0 [<ffffffff812978d4>] call_rwsem_down_read_failed+0x14/0x30 [<ffffffff81595054>] ? down_read+0x24/0x30 [<ffffffffa00cefb6>] ext4_xattr_block_set+0x546/0x6a0 [ext4] [<ffffffffa00cfc13>] ext4_expand_extra_isize_ea+0x4b3/0x8b0 [ext4] [<ffffffffa0090b06>] ext4_mark_inode_dirty+0x1a6/0x240 [ext4] [<ffffffffa0091c12>] ext4_setattr+0x452/0x5f0 [ext4] [<ffffffff811b0baa>] notify_change+0x1ca/0x330 [<ffffffff81193a36>] do_truncate+0x66/0xa0 [<ffffffff8124ab16>] ? ima_file_check+0x36/0x50 [<ffffffff811a548d>] do_last+0x6bd/0x8c0 [<ffffffff8119f700>] ? __inode_permission+0x20/0xc0 [<ffffffff811a5754>] path_openat+0xc4/0x480 [<ffffffff811d4fe0>] ? ep_read_events_proc+0xc0/0xc0 [<ffffffff811a5c4a>] do_filp_open+0x4a/0xa0 [<ffffffff811b215d>] ? __alloc_fd+0xcd/0x140 [<ffffffff8119435a>] do_sys_open+0x11a/0x230 [<ffffffff811944ae>] SyS_open+0x1e/0x20 [<ffffffff81596592>] system_call_fastpath+0x16/0x1b Signed-off-by: Li Xi <lixi <at> ddn.com> --- Index: linux.git/fs/ext4/inode.c =================================================================== --- linux.git.orig/fs/ext4/inode.c +++ linux.git/fs/ext4/inode.c @@ -2270,8 +2270,8 @@ static int mpage_map_and_submit_extent(h disksize = i_size; if (disksize > EXT4_I(inode)->i_disksize) EXT4_I(inode)->i_disksize = disksize; - err2 = ext4_mark_inode_dirty(handle, inode); up_write(&EXT4_I(inode)->i_data_sem); + err2 = ext4_mark_inode_dirty(handle, inode); if (err2) ext4_error(inode->i_sb, "Failed to mark inode %lu dirty", @@ -4650,9 +4650,11 @@ int ext4_setattr(struct dentry *dentry, } down_write(&EXT4_I(inode)->i_data_sem); EXT4_I(inode)->i_disksize = attr->ia_size; + up_write(&EXT4_I(inode)->i_data_sem); rc = ext4_mark_inode_dirty(handle, inode); if (!error) error = rc; + down_write(&EXT4_I(inode)->i_data_sem); /* * We have to update i_size under i_data_sem together * with i_disksize to avoid races with writeback code Index: linux.git/fs/ext4/ioctl.c =================================================================== --- linux.git.orig/fs/ext4/ioctl.c +++ linux.git/fs/ext4/ioctl.c @@ -169,7 +169,9 @@ static long swap_inode_boot_loader(struc ext4_discard_preallocations(inode); + ext4_double_up_write_data_sem(inode, inode_bl); err = ext4_mark_inode_dirty(handle, inode); + ext4_double_down_write_data_sem(inode, inode_bl); if (err < 0) { ext4_warning(inode->i_sb, "couldn't mark inode #%lu dirty (err %d)", @@ -177,14 +179,18 @@ static long swap_inode_boot_loader(struc /* Revert all changes: */ swap_inode_data(inode, inode_bl); } else { + ext4_double_up_write_data_sem(inode, inode_bl); err = ext4_mark_inode_dirty(handle, inode_bl); + ext4_double_down_write_data_sem(inode, inode_bl); if (err < 0) { ext4_warning(inode_bl->i_sb, "couldn't mark inode #%lu dirty (err %d)", inode_bl->i_ino, err); /* Revert all changes: */ swap_inode_data(inode, inode_bl); + ext4_double_up_write_data_sem(inode, inode_bl); ext4_mark_inode_dirty(handle, inode); + ext4_double_down_write_data_sem(inode, inode_bl); } } ext4_journal_stop(handle); Index: linux.git/fs/ext4/migrate.c =================================================================== --- linux.git.orig/fs/ext4/migrate.c +++ linux.git/fs/ext4/migrate.c @@ -635,14 +635,17 @@ int ext4_ind_migrate(struct inode *inode down_write(&EXT4_I(inode)->i_data_sem); ret = ext4_ext_check_inode(inode); - if (ret) + if (ret) { + up_write(&EXT4_I(inode)->i_data_sem); goto errout; + } eh = ext_inode_hdr(inode); ex = EXT_FIRST_EXTENT(eh); if (ext4_blocks_count(es) > EXT4_MAX_BLOCK_FILE_PHYS || eh->eh_depth != 0 || le16_to_cpu(eh->eh_entries) > 1) { ret = -EOPNOTSUPP; + up_write(&EXT4_I(inode)->i_data_sem); goto errout; } if (eh->eh_entries == 0) @@ -652,6 +655,7 @@ int ext4_ind_migrate(struct inode *inode blk = ext4_ext_pblock(ex); if (len > EXT4_NDIR_BLOCKS) { ret = -EOPNOTSUPP; + up_write(&EXT4_I(inode)->i_data_sem); goto errout; } } @@ -660,9 +664,9 @@ int ext4_ind_migrate(struct inode *inode memset(ei->i_data, 0, sizeof(ei->i_data)); for (i=0; i < len; i++) ei->i_data[i] = cpu_to_le32(blk++); + up_write(&EXT4_I(inode)->i_data_sem); ext4_mark_inode_dirty(handle, inode); errout: ext4_journal_stop(handle); - up_write(&EXT4_I(inode)->i_data_sem); return ret; } -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html