From: alex chen <alex.chen@xxxxxxxxxx> Subject: ocfs2: ip_alloc_sem should be taken in ocfs2_get_block() ip_alloc_sem should be taken in ocfs2_get_block() when reading file in DIRECT mode to prevent concurrent access to extent tree with ocfs2_dio_end_io_write(), which may cause BUGON in the following situation: read file 'A' end_io of writing file 'A' vfs_read __vfs_read ocfs2_file_read_iter generic_file_read_iter ocfs2_direct_IO __blockdev_direct_IO do_blockdev_direct_IO do_direct_IO get_more_blocks ocfs2_get_block ocfs2_extent_map_get_blocks ocfs2_get_clusters ocfs2_get_clusters_nocache() ocfs2_search_extent_list return the index of record which contains the v_cluster, that is v_cluster > rec[i]->e_cpos. ocfs2_dio_end_io ocfs2_dio_end_io_write down_write(&oi->ip_alloc_sem); ocfs2_mark_extent_written ocfs2_change_extent_flag ocfs2_split_extent ... --> modify the rec[i]->e_cpos, resulting in v_cluster < rec[i]->e_cpos. BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)) [alex.chen@xxxxxxxxxx: v3] Link: http://lkml.kernel.org/r/59EF3614.6050008@xxxxxxxxxx Link: http://lkml.kernel.org/r/59EF3614.6050008@xxxxxxxxxx Fixes: c15471f79506 ("ocfs2: fix sparse file & data ordering issue in direct io") Signed-off-by: Alex Chen <alex.chen@xxxxxxxxxx> Reviewed-by: Jun Piao <piaojun@xxxxxxxxxx> Reviewed-by: Joseph Qi <jiangqi903@xxxxxxxxx> Reviewed-by: Gang He <ghe@xxxxxxxx> Acked-by: Changwei Ge <ge.changwei@xxxxxxx> Cc: Mark Fasheh <mfasheh@xxxxxxxxxxx> Cc: Joel Becker <jlbec@xxxxxxxxxxxx> Cc: Junxiao Bi <junxiao.bi@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- fs/ocfs2/aops.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff -puN fs/ocfs2/aops.c~ocfs2-the-ip_alloc_sem-should-be-taken-in-ocfs2_get_block fs/ocfs2/aops.c --- a/fs/ocfs2/aops.c~ocfs2-the-ip_alloc_sem-should-be-taken-in-ocfs2_get_block +++ a/fs/ocfs2/aops.c @@ -134,6 +134,19 @@ bail: return err; } +static int ocfs2_lock_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + int ret = 0; + struct ocfs2_inode_info *oi = OCFS2_I(inode); + + down_read(&oi->ip_alloc_sem); + ret = ocfs2_get_block(inode, iblock, bh_result, create); + up_read(&oi->ip_alloc_sem); + + return ret; +} + int ocfs2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { @@ -2128,7 +2141,7 @@ static void ocfs2_dio_free_write_ctx(str * called like this: dio->get_blocks(dio->inode, fs_startblk, * fs_count, map_bh, dio->rw == WRITE); */ -static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock, +static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); @@ -2154,12 +2167,9 @@ static int ocfs2_dio_get_block(struct in * while file size will be changed. */ if (pos + total_len <= i_size_read(inode)) { - down_read(&oi->ip_alloc_sem); - /* This is the fast path for re-write. */ - ret = ocfs2_get_block(inode, iblock, bh_result, create); - - up_read(&oi->ip_alloc_sem); + /* This is the fast path for re-write. */ + ret = ocfs2_lock_get_block(inode, iblock, bh_result, create); if (buffer_mapped(bh_result) && !buffer_new(bh_result) && ret == 0) @@ -2424,9 +2434,9 @@ static ssize_t ocfs2_direct_IO(struct ki return 0; if (iov_iter_rw(iter) == READ) - get_block = ocfs2_get_block; + get_block = ocfs2_lock_get_block; else - get_block = ocfs2_dio_get_block; + get_block = ocfs2_dio_wr_get_block; return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, get_block, _ -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html