From: Amir Goldstein <amir73il@xxxxxxxxxxxx> Ensure there is enough disk space for snapshot file future use. Reserve disk space on snapshot take based on file system overhead size, number of directories and number of blocks/inodes in use. Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxxxxx> Signed-off-by: Yongqiang Yang <xiaoqiangnk@xxxxxxxxx> --- fs/ext4/balloc.c | 25 +++++++++++++++++++++++++ fs/ext4/ext4.h | 2 ++ fs/ext4/mballoc.c | 6 ++++++ fs/ext4/snapshot_ctl.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ fs/ext4/super.c | 16 +++++++++++++++- 5 files changed, 92 insertions(+), 1 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 8f1803f..1c140e4 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -372,6 +372,8 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) { s64 free_blocks, dirty_blocks, root_blocks; + ext4_fsblk_t snapshot_r_blocks; + handle_t *handle = journal_current_handle(); struct percpu_counter *fbc = &sbi->s_freeblocks_counter; struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; @@ -379,6 +381,29 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) dirty_blocks = percpu_counter_read_positive(dbc); root_blocks = ext4_r_blocks_count(sbi->s_es); + if (ext4_snapshot_active(sbi)) { + if (unlikely(free_blocks < (nblocks + dirty_blocks))) + /* sorry, but we're really out of space */ + return 0; + if (handle && unlikely(IS_COWING(handle))) + /* any available space may be used by COWing task */ + return 1; + /* reserve blocks for active snapshot */ + snapshot_r_blocks = + le64_to_cpu(sbi->s_es->s_snapshot_r_blocks_count); + /* + * The last snapshot_r_blocks are reserved for active snapshot + * and may not be allocated even by root. + */ + if (free_blocks < (nblocks + dirty_blocks + snapshot_r_blocks)) + return 0; + /* + * Mortal users must reserve blocks for both snapshot and + * root user. + */ + root_blocks += snapshot_r_blocks; + } + if (free_blocks - (nblocks + root_blocks + dirty_blocks) < EXT4_FREEBLOCKS_WATERMARK) { free_blocks = percpu_counter_sum_positive(fbc); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 198d7d4..8d82125 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1963,6 +1963,8 @@ extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group, struct ext4_group_desc *gdp); extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, struct ext4_group_desc *gdp); +struct kstatfs; +extern int ext4_statfs_sb(struct super_block *sb, struct kstatfs *buf); static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) { diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 6e4d960..899c12c 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4296,10 +4296,16 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, return 0; } reserv_blks = ar->len; + if (unlikely(ar->flags & EXT4_MB_HINT_COWING)) { + /* don't fail when allocating blocks for COW */ + dquot_alloc_block_nofail(ar->inode, ar->len); + goto nofail; + } while (ar->len && dquot_alloc_block(ar->inode, ar->len)) { ar->flags |= EXT4_MB_HINT_NOPREALLOC; ar->len--; } +nofail: inquota = ar->len; if (ar->len == 0) { *errp = -EDQUOT; diff --git a/fs/ext4/snapshot_ctl.c b/fs/ext4/snapshot_ctl.c index 360581d..a610025 100644 --- a/fs/ext4/snapshot_ctl.c +++ b/fs/ext4/snapshot_ctl.c @@ -711,6 +711,8 @@ int ext4_snapshot_take(struct inode *inode) int fixing = 0; int i; int err = -EIO; + u64 snapshot_r_blocks; + struct kstatfs statfs; if (!sbi->s_sbh) goto out_err; @@ -739,6 +741,47 @@ int ext4_snapshot_take(struct inode *inode) } err = -EIO; + /* update fs statistics to calculate snapshot reserved space */ + if (ext4_statfs_sb(sb, &statfs)) { + snapshot_debug(1, "failed to statfs before snapshot (%u) " + "take\n", inode->i_generation); + goto out_err; + } + /* + * Estimate maximum disk space for snapshot file metadata based on: + * 1 indirect block per 1K fs blocks (to map moved data blocks) + * +1 data block per 1K fs blocks (to copy indirect blocks) + * +1 data block per fs meta block (to copy meta blocks) + * +1 data block per directory (to copy small directory index blocks) + * +1 data block per X inodes (to copy large directory index blocks) + * + * We estimate no. of dir blocks from no. of allocated inode, assuming + * an avg. dir record size of 64 bytes. This assumption can break in + * 2 cases: + * 1. long file names (in avg.) + * 2. large no. of hard links (many dir records for the same inode) + * + * Under estimation can lead to potential ENOSPC during COW, which + * will trigger an ext4_error(). Hopefully, error behavior is set to + * remount-ro, so snapshot will not be corrupted. + * + * XXX: reserved space may be too small in data jounaling mode, + * which is currently not supported. + */ +#define AVG_DIR_RECORD_SIZE_BITS 6 /* 64 bytes */ +#define AVG_INODES_PER_DIR_BLOCK \ + (SNAPSHOT_BLOCK_SIZE_BITS - AVG_DIR_RECORD_SIZE_BITS) + snapshot_r_blocks = 2 * (statfs.f_blocks >> + SNAPSHOT_ADDR_PER_BLOCK_BITS) + + statfs.f_spare[0] + statfs.f_spare[1] + + ((statfs.f_files - statfs.f_ffree) >> + AVG_INODES_PER_DIR_BLOCK); + + /* verify enough free space before taking the snapshot */ + if (statfs.f_bfree < snapshot_r_blocks) { + err = -ENOSPC; + goto out_err; + } /* * flush journal to disk and clear the RECOVER flag @@ -876,6 +919,7 @@ next_inode: goto out_unlockfs; /* set as on-disk active snapshot */ + sbi->s_es->s_snapshot_r_blocks_count = cpu_to_le64(snapshot_r_blocks); sbi->s_es->s_snapshot_id = cpu_to_le32(le32_to_cpu(sbi->s_es->s_snapshot_id) + 1); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index dbe5651..a7be485 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4515,7 +4515,11 @@ restore_opts: static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) { - struct super_block *sb = dentry->d_sb; + return ext4_statfs_sb(dentry->d_sb, buf); +} + +int ext4_statfs_sb(struct super_block *sb, struct kstatfs *buf) +{ struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; u64 fsid; @@ -4567,6 +4571,16 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); if (buf->f_bfree < ext4_r_blocks_count(es)) buf->f_bavail = 0; + if (ext4_snapshot_active(sbi)) { + if (buf->f_bfree < ext4_r_blocks_count(es) + + le64_to_cpu(es->s_snapshot_r_blocks_count)) + buf->f_bavail = 0; + else + buf->f_bavail -= + le64_to_cpu(es->s_snapshot_r_blocks_count); + } + buf->f_spare[0] = percpu_counter_sum_positive(&sbi->s_dirs_counter); + buf->f_spare[1] = sbi->s_overhead_last; buf->f_files = le32_to_cpu(es->s_inodes_count); buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); buf->f_namelen = EXT4_NAME_LEN; -- 1.7.4.1 -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html