On Mon, 2011-05-02 at 19:05 -0700, Yongqiang Yang wrote: > v0->v1: > -- ext4_ext_convert_initialized() zeroout whole extent when the extent's > length is less than 14. > > convert and split unwritten are reimplemented based on ext4_split_extent() > added in last patch. > > Signed-off-by: Yongqiang Yang <xiaoqiangnk@xxxxxxxxx> > Tested-by: Allison Henderson <achender@xxxxxxxxxxxxxxxxxx> Nice code reduction done, like the way to handling buffered IO case, doing zero out first, then do the split. Good to me. Reviewed-by: Mingming Cao <cmm@xxxxxxxxxx> > --- > fs/ext4/extents.c | 480 ++++++++--------------------------------------------- > 1 files changed, 72 insertions(+), 408 deletions(-) > > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c > index db1d67c..9e7c7b3 100644 > --- a/fs/ext4/extents.c > +++ b/fs/ext4/extents.c > @@ -2757,17 +2757,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, > struct ext4_map_blocks *map, > struct ext4_ext_path *path) > { > - struct ext4_extent *ex, newex, orig_ex; > - struct ext4_extent *ex1 = NULL; > - struct ext4_extent *ex2 = NULL; > - struct ext4_extent *ex3 = NULL; > - struct ext4_extent_header *eh; > + struct ext4_map_blocks split_map; > + struct ext4_extent zero_ex; > + struct ext4_extent *ex; > ext4_lblk_t ee_block, eof_block; > unsigned int allocated, ee_len, depth; > - ext4_fsblk_t newblock; > int err = 0; > - int ret = 0; > - int may_zeroout; > + int split_flag = 0; > > ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" > "block %llu, max_blocks %u\n", inode->i_ino, > @@ -2779,280 +2775,87 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, > eof_block = map->m_lblk + map->m_len; > > depth = ext_depth(inode); > - eh = path[depth].p_hdr; > ex = path[depth].p_ext; > ee_block = le32_to_cpu(ex->ee_block); > ee_len = ext4_ext_get_actual_len(ex); > allocated = ee_len - (map->m_lblk - ee_block); > - newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); > - > - ex2 = ex; > - orig_ex.ee_block = ex->ee_block; > - orig_ex.ee_len = cpu_to_le16(ee_len); > - ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); > > + WARN_ON(map->m_lblk < ee_block); > /* > * It is safe to convert extent to initialized via explicit > * zeroout only if extent is fully insde i_size or new_size. > */ > - may_zeroout = ee_block + ee_len <= eof_block; > + split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; > > - err = ext4_ext_get_access(handle, inode, path + depth); > - if (err) > - goto out; > /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ > - if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) { > - err = ext4_ext_zeroout(inode, &orig_ex); > + if (ee_len <= 2*EXT4_EXT_ZERO_LEN && > + (EXT4_EXT_MAY_ZEROOUT & split_flag)) { > + err = ext4_ext_zeroout(inode, ex); > if (err) > - goto fix_extent_len; > - /* update the extent length and mark as initialized */ > - ex->ee_block = orig_ex.ee_block; > - ex->ee_len = orig_ex.ee_len; > - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); > - ext4_ext_dirty(handle, inode, path + depth); > - /* zeroed the full extent */ > - return allocated; > - } > - > - /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ > - if (map->m_lblk > ee_block) { > - ex1 = ex; > - ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); > - ext4_ext_mark_uninitialized(ex1); > - ex2 = &newex; > - } > - /* > - * for sanity, update the length of the ex2 extent before > - * we insert ex3, if ex1 is NULL. This is to avoid temporary > - * overlap of blocks. > - */ > - if (!ex1 && allocated > map->m_len) > - ex2->ee_len = cpu_to_le16(map->m_len); > - /* ex3: to ee_block + ee_len : uninitialised */ > - if (allocated > map->m_len) { > - unsigned int newdepth; > - /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ > - if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) { > - /* > - * map->m_lblk == ee_block is handled by the zerouout > - * at the beginning. > - * Mark first half uninitialized. > - * Mark second half initialized and zero out the > - * initialized extent > - */ > - ex->ee_block = orig_ex.ee_block; > - ex->ee_len = cpu_to_le16(ee_len - allocated); > - ext4_ext_mark_uninitialized(ex); > - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); > - ext4_ext_dirty(handle, inode, path + depth); > - > - ex3 = &newex; > - ex3->ee_block = cpu_to_le32(map->m_lblk); > - ext4_ext_store_pblock(ex3, newblock); > - ex3->ee_len = cpu_to_le16(allocated); > - err = ext4_ext_insert_extent(handle, inode, path, > - ex3, 0); > - if (err == -ENOSPC) { > - err = ext4_ext_zeroout(inode, &orig_ex); > - if (err) > - goto fix_extent_len; > - ex->ee_block = orig_ex.ee_block; > - ex->ee_len = orig_ex.ee_len; > - ext4_ext_store_pblock(ex, > - ext4_ext_pblock(&orig_ex)); > - ext4_ext_dirty(handle, inode, path + depth); > - /* blocks available from map->m_lblk */ > - return allocated; > - > - } else if (err) > - goto fix_extent_len; > - > - /* > - * We need to zero out the second half because > - * an fallocate request can update file size and > - * converting the second half to initialized extent > - * implies that we can leak some junk data to user > - * space. > - */ > - err = ext4_ext_zeroout(inode, ex3); > - if (err) { > - /* > - * We should actually mark the > - * second half as uninit and return error > - * Insert would have changed the extent > - */ > - depth = ext_depth(inode); > - ext4_ext_drop_refs(path); > - path = ext4_ext_find_extent(inode, map->m_lblk, > - path); > - if (IS_ERR(path)) { > - err = PTR_ERR(path); > - return err; > - } > - /* get the second half extent details */ > - ex = path[depth].p_ext; > - err = ext4_ext_get_access(handle, inode, > - path + depth); > - if (err) > - return err; > - ext4_ext_mark_uninitialized(ex); > - ext4_ext_dirty(handle, inode, path + depth); > - return err; > - } > - > - /* zeroed the second half */ > - return allocated; > - } > - ex3 = &newex; > - ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); > - ext4_ext_store_pblock(ex3, newblock + map->m_len); > - ex3->ee_len = cpu_to_le16(allocated - map->m_len); > - ext4_ext_mark_uninitialized(ex3); > - err = ext4_ext_insert_extent(handle, inode, path, ex3, 0); > - if (err == -ENOSPC && may_zeroout) { > - err = ext4_ext_zeroout(inode, &orig_ex); > - if (err) > - goto fix_extent_len; > - /* update the extent length and mark as initialized */ > - ex->ee_block = orig_ex.ee_block; > - ex->ee_len = orig_ex.ee_len; > - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); > - ext4_ext_dirty(handle, inode, path + depth); > - /* zeroed the full extent */ > - /* blocks available from map->m_lblk */ > - return allocated; > - > - } else if (err) > - goto fix_extent_len; > - /* > - * The depth, and hence eh & ex might change > - * as part of the insert above. > - */ > - newdepth = ext_depth(inode); > - /* > - * update the extent length after successful insert of the > - * split extent > - */ > - ee_len -= ext4_ext_get_actual_len(ex3); > - orig_ex.ee_len = cpu_to_le16(ee_len); > - may_zeroout = ee_block + ee_len <= eof_block; > - > - depth = newdepth; > - ext4_ext_drop_refs(path); > - path = ext4_ext_find_extent(inode, map->m_lblk, path); > - if (IS_ERR(path)) { > - err = PTR_ERR(path); > goto out; > - } > - eh = path[depth].p_hdr; > - ex = path[depth].p_ext; > - if (ex2 != &newex) > - ex2 = ex; > > err = ext4_ext_get_access(handle, inode, path + depth); > if (err) > goto out; > - > - allocated = map->m_len; > - > - /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying > - * to insert a extent in the middle zerout directly > - * otherwise give the extent a chance to merge to left > - */ > - if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && > - map->m_lblk != ee_block && may_zeroout) { > - err = ext4_ext_zeroout(inode, &orig_ex); > - if (err) > - goto fix_extent_len; > - /* update the extent length and mark as initialized */ > - ex->ee_block = orig_ex.ee_block; > - ex->ee_len = orig_ex.ee_len; > - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); > - ext4_ext_dirty(handle, inode, path + depth); > - /* zero out the first half */ > - /* blocks available from map->m_lblk */ > - return allocated; > - } > - } > - /* > - * If there was a change of depth as part of the > - * insertion of ex3 above, we need to update the length > - * of the ex1 extent again here > - */ > - if (ex1 && ex1 != ex) { > - ex1 = ex; > - ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); > - ext4_ext_mark_uninitialized(ex1); > - ex2 = &newex; > - } > - /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */ > - ex2->ee_block = cpu_to_le32(map->m_lblk); > - ext4_ext_store_pblock(ex2, newblock); > - ex2->ee_len = cpu_to_le16(allocated); > - if (ex2 != ex) > - goto insert; > - /* > - * New (initialized) extent starts from the first block > - * in the current extent. i.e., ex2 == ex > - * We have to see if it can be merged with the extent > - * on the left. > - */ > - if (ex2 > EXT_FIRST_EXTENT(eh)) { > - /* > - * To merge left, pass "ex2 - 1" to try_to_merge(), > - * since it merges towards right _only_. > - */ > - ret = ext4_ext_try_to_merge(inode, path, ex2 - 1); > - if (ret) { > - err = ext4_ext_correct_indexes(handle, inode, path); > - if (err) > - goto out; > - depth = ext_depth(inode); > - ex2--; > - } > + ext4_ext_mark_initialized(ex); > + ext4_ext_try_to_merge(inode, path, ex); > + err = ext4_ext_dirty(handle, inode, path + depth); > + goto out; > } > + > /* > - * Try to Merge towards right. This might be required > - * only when the whole extent is being written to. > - * i.e. ex2 == ex and ex3 == NULL. > + * four cases: > + * 1. split the extent into three extents. > + * 2. split the extent into two extents, zeroout the first half. > + * 3. split the extent into two extents, zeroout the second half. > + * 4. split the extent into two extents with out zeroout. > */ > - if (!ex3) { > - ret = ext4_ext_try_to_merge(inode, path, ex2); > - if (ret) { > - err = ext4_ext_correct_indexes(handle, inode, path); > + split_map.m_lblk = map->m_lblk; > + split_map.m_len = map->m_len; > + > + if (allocated > map->m_len) { > + if (allocated <= EXT4_EXT_ZERO_LEN && > + (EXT4_EXT_MAY_ZEROOUT & split_flag)) { > + /* case 3 */ > + zero_ex.ee_block = > + cpu_to_le32(map->m_lblk + map->m_len); > + zero_ex.ee_len = cpu_to_le16(allocated - map->m_len); > + ext4_ext_store_pblock(&zero_ex, > + ext4_ext_pblock(ex) + map->m_lblk - ee_block); > + err = ext4_ext_zeroout(inode, &zero_ex); > if (err) > goto out; > + split_map.m_lblk = map->m_lblk; > + split_map.m_len = allocated; > + } else if ((map->m_lblk - ee_block + map->m_len < > + EXT4_EXT_ZERO_LEN) && > + (EXT4_EXT_MAY_ZEROOUT & split_flag)) { > + /* case 2 */ > + if (map->m_lblk != ee_block) { > + zero_ex.ee_block = ex->ee_block; > + zero_ex.ee_len = cpu_to_le16(map->m_lblk - > + ee_block); > + ext4_ext_store_pblock(&zero_ex, > + ext4_ext_pblock(ex)); > + err = ext4_ext_zeroout(inode, &zero_ex); > + if (err) > + goto out; > + } > + > + allocated = map->m_lblk - ee_block + map->m_len; > + > + split_map.m_lblk = ee_block; > + split_map.m_len = allocated; > } > } > - /* Mark modified extent as dirty */ > - err = ext4_ext_dirty(handle, inode, path + depth); > - goto out; > -insert: > - err = ext4_ext_insert_extent(handle, inode, path, &newex, 0); > - if (err == -ENOSPC && may_zeroout) { > - err = ext4_ext_zeroout(inode, &orig_ex); > - if (err) > - goto fix_extent_len; > - /* update the extent length and mark as initialized */ > - ex->ee_block = orig_ex.ee_block; > - ex->ee_len = orig_ex.ee_len; > - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); > - ext4_ext_dirty(handle, inode, path + depth); > - /* zero out the first half */ > - return allocated; > - } else if (err) > - goto fix_extent_len; > + > + allocated = ext4_split_extent(handle, inode, path, > + &split_map, split_flag, 0); > + if (allocated < 0) > + err = allocated; > + > out: > - ext4_ext_show_leaf(inode, path); > return err ? err : allocated; > - > -fix_extent_len: > - ex->ee_block = orig_ex.ee_block; > - ex->ee_len = orig_ex.ee_len; > - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); > - ext4_ext_mark_uninitialized(ex); > - ext4_ext_dirty(handle, inode, path + depth); > - return err; > } > > /* > @@ -3083,15 +2886,11 @@ static int ext4_split_unwritten_extents(handle_t *handle, > struct ext4_ext_path *path, > int flags) > { > - struct ext4_extent *ex, newex, orig_ex; > - struct ext4_extent *ex1 = NULL; > - struct ext4_extent *ex2 = NULL; > - struct ext4_extent *ex3 = NULL; > - ext4_lblk_t ee_block, eof_block; > - unsigned int allocated, ee_len, depth; > - ext4_fsblk_t newblock; > - int err = 0; > - int may_zeroout; > + ext4_lblk_t eof_block; > + ext4_lblk_t ee_block; > + struct ext4_extent *ex; > + unsigned int ee_len; > + int split_flag = 0, depth; > > ext_debug("ext4_split_unwritten_extents: inode %lu, logical" > "block %llu, max_blocks %u\n", inode->i_ino, > @@ -3101,155 +2900,20 @@ static int ext4_split_unwritten_extents(handle_t *handle, > inode->i_sb->s_blocksize_bits; > if (eof_block < map->m_lblk + map->m_len) > eof_block = map->m_lblk + map->m_len; > - > - depth = ext_depth(inode); > - ex = path[depth].p_ext; > - ee_block = le32_to_cpu(ex->ee_block); > - ee_len = ext4_ext_get_actual_len(ex); > - allocated = ee_len - (map->m_lblk - ee_block); > - newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); > - > - ex2 = ex; > - orig_ex.ee_block = ex->ee_block; > - orig_ex.ee_len = cpu_to_le16(ee_len); > - ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); > - > /* > * It is safe to convert extent to initialized via explicit > * zeroout only if extent is fully insde i_size or new_size. > */ > - may_zeroout = ee_block + ee_len <= eof_block; > - > - /* > - * If the uninitialized extent begins at the same logical > - * block where the write begins, and the write completely > - * covers the extent, then we don't need to split it. > - */ > - if ((map->m_lblk == ee_block) && (allocated <= map->m_len)) > - return allocated; > - > - err = ext4_ext_get_access(handle, inode, path + depth); > - if (err) > - goto out; > - /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ > - if (map->m_lblk > ee_block) { > - ex1 = ex; > - ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); > - ext4_ext_mark_uninitialized(ex1); > - ex2 = &newex; > - } > - /* > - * for sanity, update the length of the ex2 extent before > - * we insert ex3, if ex1 is NULL. This is to avoid temporary > - * overlap of blocks. > - */ > - if (!ex1 && allocated > map->m_len) > - ex2->ee_len = cpu_to_le16(map->m_len); > - /* ex3: to ee_block + ee_len : uninitialised */ > - if (allocated > map->m_len) { > - unsigned int newdepth; > - ex3 = &newex; > - ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); > - ext4_ext_store_pblock(ex3, newblock + map->m_len); > - ex3->ee_len = cpu_to_le16(allocated - map->m_len); > - ext4_ext_mark_uninitialized(ex3); > - err = ext4_ext_insert_extent(handle, inode, path, ex3, flags); > - if (err == -ENOSPC && may_zeroout) { > - err = ext4_ext_zeroout(inode, &orig_ex); > - if (err) > - goto fix_extent_len; > - /* update the extent length and mark as initialized */ > - ex->ee_block = orig_ex.ee_block; > - ex->ee_len = orig_ex.ee_len; > - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); > - ext4_ext_dirty(handle, inode, path + depth); > - /* zeroed the full extent */ > - /* blocks available from map->m_lblk */ > - return allocated; > - > - } else if (err) > - goto fix_extent_len; > - /* > - * The depth, and hence eh & ex might change > - * as part of the insert above. > - */ > - newdepth = ext_depth(inode); > - /* > - * update the extent length after successful insert of the > - * split extent > - */ > - ee_len -= ext4_ext_get_actual_len(ex3); > - orig_ex.ee_len = cpu_to_le16(ee_len); > - may_zeroout = ee_block + ee_len <= eof_block; > - > - depth = newdepth; > - ext4_ext_drop_refs(path); > - path = ext4_ext_find_extent(inode, map->m_lblk, path); > - if (IS_ERR(path)) { > - err = PTR_ERR(path); > - goto out; > - } > - ex = path[depth].p_ext; > - if (ex2 != &newex) > - ex2 = ex; > + depth = ext_depth(inode); > + ex = path[depth].p_ext; > + ee_block = le32_to_cpu(ex->ee_block); > + ee_len = ext4_ext_get_actual_len(ex); > > - err = ext4_ext_get_access(handle, inode, path + depth); > - if (err) > - goto out; > + split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; > + split_flag |= EXT4_EXT_MARK_UNINIT2; > > - allocated = map->m_len; > - } > - /* > - * If there was a change of depth as part of the > - * insertion of ex3 above, we need to update the length > - * of the ex1 extent again here > - */ > - if (ex1 && ex1 != ex) { > - ex1 = ex; > - ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); > - ext4_ext_mark_uninitialized(ex1); > - ex2 = &newex; > - } > - /* > - * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written > - * using direct I/O, uninitialised still. > - */ > - ex2->ee_block = cpu_to_le32(map->m_lblk); > - ext4_ext_store_pblock(ex2, newblock); > - ex2->ee_len = cpu_to_le16(allocated); > - ext4_ext_mark_uninitialized(ex2); > - if (ex2 != ex) > - goto insert; > - /* Mark modified extent as dirty */ > - err = ext4_ext_dirty(handle, inode, path + depth); > - ext_debug("out here\n"); > - goto out; > -insert: > - err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); > - if (err == -ENOSPC && may_zeroout) { > - err = ext4_ext_zeroout(inode, &orig_ex); > - if (err) > - goto fix_extent_len; > - /* update the extent length and mark as initialized */ > - ex->ee_block = orig_ex.ee_block; > - ex->ee_len = orig_ex.ee_len; > - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); > - ext4_ext_dirty(handle, inode, path + depth); > - /* zero out the first half */ > - return allocated; > - } else if (err) > - goto fix_extent_len; > -out: > - ext4_ext_show_leaf(inode, path); > - return err ? err : allocated; > - > -fix_extent_len: > - ex->ee_block = orig_ex.ee_block; > - ex->ee_len = orig_ex.ee_len; > - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); > - ext4_ext_mark_uninitialized(ex); > - ext4_ext_dirty(handle, inode, path + depth); > - return err; > + flags |= EXT4_GET_BLOCKS_PRE_IO; > + return ext4_split_extent(handle, inode, path, map, split_flag, flags); > } > > static int ext4_convert_unwritten_extents_endio(handle_t *handle, -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html