From: Amir Goldstein <amir73il@xxxxxxxxxxxx> On snapshot create, a few special blocks (i.e., the super block and group descriptors) are pre-allocated and on snapshot take, they are copied under journal_lock_updates(). This is done to avoid the recursion that would be caused by COWing these blocks after the snapshot becomes active. Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxxxxx> Signed-off-by: Yongqiang Yang <xiaoqiangnk@xxxxxxxxx> --- fs/ext4/snapshot_ctl.c | 308 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 308 insertions(+), 0 deletions(-) diff --git a/fs/ext4/snapshot_ctl.c b/fs/ext4/snapshot_ctl.c index f2dbef4..9d915a9 100644 --- a/fs/ext4/snapshot_ctl.c +++ b/fs/ext4/snapshot_ctl.c @@ -299,6 +299,48 @@ int __extend_or_restart_transaction(const char *where, #define extend_or_restart_transaction_inode(handle, inode, nblocks) \ __extend_or_restart_transaction(__func__, (handle), (inode), (nblocks)) +/* + * helper function for snapshot_create(). + * places pre-allocated [d,t]ind blocks in position + * after they have been allocated as direct blocks. + */ +static inline int ext4_snapshot_shift_blocks(struct ext4_inode_info *ei, + int from, int to, int count) +{ + int i, err = -EIO; + + /* move from direct blocks range */ + BUG_ON(from < 0 || from + count > EXT4_NDIR_BLOCKS); + /* to indirect blocks range */ + BUG_ON(to < EXT4_NDIR_BLOCKS || to + count > EXT4_SNAPSHOT_N_BLOCKS); + + /* + * truncate_mutex is held whenever allocating or freeing inode + * blocks. + */ + down_write(&ei->i_data_sem); + + /* + * verify that 'from' blocks are allocated + * and that 'to' blocks are not allocated. + */ + for (i = 0; i < count; i++) + if (!ei->i_data[from+i] || + ei->i_data[(to+i)%EXT4_N_BLOCKS]) + goto out; + + /* + * shift 'count' blocks from position 'from' to 'to' + */ + for (i = 0; i < count; i++) { + ei->i_data[(to+i)%EXT4_N_BLOCKS] = ei->i_data[from+i]; + ei->i_data[from+i] = 0; + } + err = 0; +out: + up_write(&ei->i_data_sem); + return err; +} static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, unsigned long ino, @@ -344,6 +386,13 @@ static int ext4_snapshot_create(struct inode *inode) struct inode *active_snapshot = ext4_snapshot_has_active(sb); struct ext4_inode_info *ei = EXT4_I(inode); int i, err, ret; + int count, nind; + const long double_blocks = (1 << (2 * SNAPSHOT_ADDR_PER_BLOCK_BITS)); + struct buffer_head *bh = NULL; + struct ext4_group_desc *desc; + unsigned long ino; + struct ext4_iloc iloc; + ext4_fsblk_t bmap_blk = 0, imap_blk = 0, inode_blk = 0; ext4_fsblk_t snapshot_blocks = ext4_blocks_count(sbi->s_es); if (active_snapshot) { snapshot_debug(1, "failed to add snapshot because active " @@ -418,6 +467,140 @@ static int ext4_snapshot_create(struct inode *inode) if (err) goto out_handle; + /* small filesystems can be mapped with just 1 double indirect block */ + nind = 1; + if (snapshot_blocks > double_blocks) + /* add up to 4 triple indirect blocks to map 2^32 blocks */ + nind += ((snapshot_blocks - double_blocks) >> + (3 * SNAPSHOT_ADDR_PER_BLOCK_BITS)) + 1; + if (nind > 2 + EXT4_SNAPSHOT_EXTRA_TIND_BLOCKS) { + snapshot_debug(1, "need too many [d,t]ind blocks (%d) " + "for snapshot (%u)\n", + nind, inode->i_generation); + err = -EFBIG; + goto out_handle; + } + + err = extend_or_restart_transaction_inode(handle, inode, + nind * EXT4_DATA_TRANS_BLOCKS(sb)); + if (err) + goto out_handle; + + /* pre-allocate and zero out [d,t]ind blocks */ + for (i = 0; i < nind; i++) { + brelse(bh); + bh = ext4_getblk(handle, inode, i, SNAPMAP_WRITE, &err); + if (!bh) + break; + /* zero out indirect block and journal as dirty metadata */ + err = ext4_journal_get_write_access(handle, bh); + if (err) + break; + lock_buffer(bh); + memset(bh->b_data, 0, bh->b_size); + set_buffer_uptodate(bh); + unlock_buffer(bh); + err = ext4_handle_dirty_metadata(handle, NULL, bh); + if (err) + break; + } + brelse(bh); + if (!bh || err) { + snapshot_debug(1, "failed to initiate [d,t]ind block (%d) " + "for snapshot (%u)\n", + i, inode->i_generation); + goto out_handle; + } + /* place pre-allocated [d,t]ind blocks in position */ + err = ext4_snapshot_shift_blocks(ei, 0, EXT4_DIND_BLOCK, nind); + if (err) { + snapshot_debug(1, "failed to move pre-allocated [d,t]ind blocks" + " for snapshot (%u)\n", + inode->i_generation); + goto out_handle; + } + + /* allocate super block and group descriptors for snapshot */ + count = sbi->s_gdb_count + 1; + err = count; + for (i = 0; err > 0 && i < count; i += err) { + err = extend_or_restart_transaction_inode(handle, inode, + EXT4_DATA_TRANS_BLOCKS(sb)); + if (err) + goto out_handle; + err = ext4_snapshot_map_blocks(handle, inode, i, count - i, + NULL, SNAPMAP_WRITE); + } + if (err <= 0) { + snapshot_debug(1, "failed to allocate super block and %d " + "group descriptor blocks for snapshot (%u)\n", + count - 1, inode->i_generation); + if (err) + err = -EIO; + goto out_handle; + } + + ino = inode->i_ino; + /* + * pre-allocate the following blocks in the new snapshot: + * - block and inode bitmap blocks of ino's block group + * - inode table block that contains ino + */ + err = extend_or_restart_transaction_inode(handle, inode, + 3 * EXT4_DATA_TRANS_BLOCKS(sb)); + if (err) + goto out_handle; + + inode_blk = ext4_get_inode_block(sb, ino, &iloc); + + bmap_blk = 0; + imap_blk = 0; + desc = ext4_get_group_desc(sb, iloc.block_group, NULL); + if (!desc) + goto next_snapshot; + + bmap_blk = ext4_block_bitmap(sb, desc); + imap_blk = ext4_inode_bitmap(sb, desc); + if (!bmap_blk || !imap_blk) + goto next_snapshot; + + count = 1; + if (imap_blk == bmap_blk + 1) + count++; + if ((count > 1) && (inode_blk == imap_blk + 1)) + count++; + /* try to allocate all blocks at once */ + err = ext4_snapshot_map_blocks(handle, inode, + bmap_blk, count, + NULL, SNAPMAP_WRITE); + count = err; + /* allocate remaining blocks one by one */ + if (err > 0 && count < 2) + err = ext4_snapshot_map_blocks(handle, inode, + imap_blk, 1, + NULL, + SNAPMAP_WRITE); + if (err > 0 && count < 3) + err = ext4_snapshot_map_blocks(handle, inode, + inode_blk, 1, + NULL, + SNAPMAP_WRITE); +next_snapshot: + if (!bmap_blk || !imap_blk || !inode_blk || err < 0) { +#ifdef CONFIG_EXT4_DEBUG + ext4_fsblk_t blk0 = iloc.block_group * + EXT4_BLOCKS_PER_GROUP(sb); + snapshot_debug(1, "failed to allocate block/inode bitmap " + "or inode table block of inode (%lu) " + "(%llu,%llu,%llu/%u) for snapshot (%u)\n", + ino, bmap_blk - blk0, + imap_blk - blk0, inode_blk - blk0, + iloc.block_group, inode->i_generation); +#endif + if (!err) + err = -EIO; + goto out_handle; + } snapshot_debug(1, "snapshot (%u) created\n", inode->i_generation); err = 0; out_handle: @@ -427,6 +610,68 @@ out_handle: return err; } +/* + * ext4_snapshot_copy_block() - copy block to new snapshot + * @snapshot: new snapshot to copy block to + * @bh: source buffer to be copied + * @mask: if not NULL, mask buffer data before copying to snapshot + * (used to mask block bitmap with exclude bitmap) + * @name: name of copied block to print + * @idx: index of copied block to print + * + * Called from ext4_snapshot_take() under journal_lock_updates() + * Returns snapshot buffer on success, NULL on error + */ +static struct buffer_head *ext4_snapshot_copy_block(struct inode *snapshot, + struct buffer_head *bh, const char *mask, + const char *name, unsigned long idx) +{ + struct buffer_head *sbh = NULL; + int err; + + if (!bh) + return NULL; + + sbh = ext4_getblk(NULL, snapshot, + SNAPSHOT_IBLOCK(bh->b_blocknr), + SNAPMAP_READ, &err); + + if (!sbh || sbh->b_blocknr == bh->b_blocknr) { + snapshot_debug(1, "failed to copy %s (%lu) " + "block [%llu/%llu] to snapshot (%u)\n", + name, idx, + SNAPSHOT_BLOCK_TUPLE(bh->b_blocknr), + snapshot->i_generation); + brelse(sbh); + return NULL; + } + + ext4_snapshot_copy_buffer(sbh, bh, mask); + + snapshot_debug(4, "copied %s (%lu) block [%llu/%llu] " + "to snapshot (%u)\n", + name, idx, + SNAPSHOT_BLOCK_TUPLE(bh->b_blocknr), + snapshot->i_generation); + return sbh; +} + +/* + * List of blocks which are copied to snapshot for every special inode. + * Keep block bitmap first and inode table block last in the list. + */ +enum copy_inode_block { + COPY_BLOCK_BITMAP, + COPY_INODE_BITMAP, + COPY_INODE_TABLE, + COPY_INODE_BLOCKS_NUM +}; + +static char *copy_inode_block_name[COPY_INODE_BLOCKS_NUM] = { + "block bitmap", + "inode bitmap", + "inode table" +}; /* * ext4_snapshot_take() makes a new snapshot file @@ -443,6 +688,12 @@ int ext4_snapshot_take(struct inode *inode) struct ext4_super_block *es = NULL; struct buffer_head *es_bh = NULL; struct buffer_head *sbh = NULL; + struct buffer_head *bhs[COPY_INODE_BLOCKS_NUM] = { NULL }; + const char *mask = NULL; + struct inode *curr_inode; + struct ext4_iloc iloc; + struct ext4_group_desc *desc; + int i; int err = -EIO; if (!sbi->s_sbh) @@ -489,6 +740,61 @@ int ext4_snapshot_take(struct inode *inode) } #endif + /* + * copy group descriptors to snapshot + */ + for (i = 0; i < sbi->s_gdb_count; i++) { + brelse(sbh); + sbh = ext4_snapshot_copy_block(inode, + sbi->s_group_desc[i], NULL, + "GDT", i); + if (!sbh) + goto out_unlockfs; + } + + curr_inode = inode; + /* + * copy the following blocks to the new snapshot: + * - block and inode bitmap blocks of curr_inode block group + * - inode table block that contains curr_inode + */ + iloc.block_group = 0; + err = ext4_get_inode_loc(curr_inode, &iloc); + brelse(bhs[COPY_INODE_TABLE]); + bhs[COPY_INODE_TABLE] = iloc.bh; + desc = ext4_get_group_desc(sb, iloc.block_group, NULL); + if (err || !desc) { + snapshot_debug(1, "failed to read inode and bitmap blocks " + "of inode (%lu)\n", curr_inode->i_ino); + err = err ? : -EIO; + goto out_unlockfs; + } + brelse(bhs[COPY_BLOCK_BITMAP]); + bhs[COPY_BLOCK_BITMAP] = sb_bread(sb, + ext4_block_bitmap(sb, desc)); + brelse(bhs[COPY_INODE_BITMAP]); + bhs[COPY_INODE_BITMAP] = sb_bread(sb, + ext4_inode_bitmap(sb, desc)); + err = -EIO; + for (i = 0; i < COPY_INODE_BLOCKS_NUM; i++) { + brelse(sbh); + sbh = ext4_snapshot_copy_block(inode, bhs[i], mask, + copy_inode_block_name[i], curr_inode->i_ino); + if (!sbh) + goto out_unlockfs; + mask = NULL; + } + + /* + * copy super block to snapshot and fix it + */ + lock_buffer(es_bh); + memcpy(es_bh->b_data, sbi->s_sbh->b_data, sb->s_blocksize); + set_buffer_uptodate(es_bh); + unlock_buffer(es_bh); + mark_buffer_dirty(es_bh); + sync_dirty_buffer(es_bh); + /* reset i_size and invalidate page cache */ SNAPSHOT_SET_DISABLED(inode); @@ -523,6 +829,8 @@ out_unlockfs: out_err: brelse(es_bh); brelse(sbh); + for (i = 0; i < COPY_INODE_BLOCKS_NUM; i++) + brelse(bhs[i]); return err; } -- 1.7.4.1 -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html