Added to patch queue 在 2008-08-27三的 20:58 +0530,Aneesh Kumar K.V写道: > This patch add dirty block accounting using percpu_counters. > Delayed allocation block reservation is now done by updating > dirty block counter. In the later patch we switch to non > delalloc mode if the filesystem free blocks is < that > 150 % of total filesystem dirty blocks > Reviewed-by: Mingming Cao <cmm@xxxxxxxxxx> > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxxxxxxx> > --- > fs/ext4/balloc.c | 59 +++++++++++++++++++++++++++++++++------------------- > fs/ext4/ext4_sb.h | 1 + > fs/ext4/inode.c | 22 +++++++++--------- > fs/ext4/mballoc.c | 17 ++------------ > fs/ext4/super.c | 8 ++++++- > 5 files changed, 59 insertions(+), 48 deletions(-) > > diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c > index 5767332..b19346a 100644 > --- a/fs/ext4/balloc.c > +++ b/fs/ext4/balloc.c > @@ -1605,26 +1605,38 @@ ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, > int ext4_claim_free_blocks(struct ext4_sb_info *sbi, > ext4_fsblk_t nblocks) > { > - s64 free_blocks; > + s64 free_blocks, dirty_blocks; > ext4_fsblk_t root_blocks = 0; > struct percpu_counter *fbc = &sbi->s_freeblocks_counter; > + struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; > > - free_blocks = percpu_counter_read(fbc); > + free_blocks = percpu_counter_read_positive(fbc); > + dirty_blocks = percpu_counter_read_positive(dbc); > > if (!capable(CAP_SYS_RESOURCE) && > sbi->s_resuid != current->fsuid && > (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) > root_blocks = ext4_r_blocks_count(sbi->s_es); > > - if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK) > - free_blocks = percpu_counter_sum(&sbi->s_freeblocks_counter); > - > - if (free_blocks < (root_blocks + nblocks)) > + if (free_blocks - (nblocks + root_blocks + dirty_blocks) < > + EXT4_FREEBLOCKS_WATERMARK) { > + free_blocks = percpu_counter_sum(fbc); > + dirty_blocks = percpu_counter_sum(dbc); > + if (dirty_blocks < 0) { > + printk(KERN_CRIT "Dirty block accounting " > + "went wrong %lld\n", > + dirty_blocks); > + } > + } > + /* Check whether we have space after > + * accounting for current dirty blocks > + */ > + if (free_blocks < ((s64)(root_blocks + nblocks) + dirty_blocks)) > /* we don't have free space */ > return -ENOSPC; > > - /* reduce fs free blocks counter */ > - percpu_counter_sub(fbc, nblocks); > + /* Add the blocks to nblocks */ > + percpu_counter_add(dbc, nblocks); > return 0; > } > > @@ -1640,23 +1652,28 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi, > ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, > ext4_fsblk_t nblocks) > { > - ext4_fsblk_t free_blocks; > + ext4_fsblk_t free_blocks, dirty_blocks; > ext4_fsblk_t root_blocks = 0; > + struct percpu_counter *fbc = &sbi->s_freeblocks_counter; > + struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; > > - free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); > + free_blocks = percpu_counter_read_positive(fbc); > + dirty_blocks = percpu_counter_read_positive(dbc); > > if (!capable(CAP_SYS_RESOURCE) && > sbi->s_resuid != current->fsuid && > (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) > root_blocks = ext4_r_blocks_count(sbi->s_es); > > - if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK) > - free_blocks = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); > - > - if (free_blocks <= root_blocks) > + if (free_blocks - (nblocks + root_blocks + dirty_blocks) < > + EXT4_FREEBLOCKS_WATERMARK) { > + free_blocks = percpu_counter_sum_positive(fbc); > + dirty_blocks = percpu_counter_sum_positive(dbc); > + } > + if (free_blocks <= (root_blocks + dirty_blocks)) > /* we don't have free space */ > return 0; > - if (free_blocks - root_blocks < nblocks) > + if (free_blocks - (root_blocks + dirty_blocks) < nblocks) > return free_blocks - root_blocks; > return nblocks; > } > @@ -1943,13 +1960,11 @@ ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, > le16_add_cpu(&gdp->bg_free_blocks_count, -num); > gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); > spin_unlock(sb_bgl_lock(sbi, group_no)); > - if (!EXT4_I(inode)->i_delalloc_reserved_flag && (*count != num)) { > - /* > - * we allocated less blocks than we > - * claimed. Add the difference back. > - */ > - percpu_counter_add(&sbi->s_freeblocks_counter, *count - num); > - } > + percpu_counter_sub(&sbi->s_freeblocks_counter, num); > + /* > + * Now reduce the dirty block count also. Should not go negative > + */ > + percpu_counter_sub(&sbi->s_dirtyblocks_counter, num); > if (sbi->s_log_groups_per_flex) { > ext4_group_t flex_group = ext4_flex_group(sbi, group_no); > spin_lock(sb_bgl_lock(sbi, flex_group)); > diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h > index 6300226..0fa3762 100644 > --- a/fs/ext4/ext4_sb.h > +++ b/fs/ext4/ext4_sb.h > @@ -59,6 +59,7 @@ struct ext4_sb_info { > struct percpu_counter s_freeblocks_counter; > struct percpu_counter s_freeinodes_counter; > struct percpu_counter s_dirs_counter; > + struct percpu_counter s_dirtyblocks_counter; > struct blockgroup_lock s_blockgroup_lock; > > /* root of the per fs reservation window tree */ > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c > index 98a998b..14ec7d1 100644 > --- a/fs/ext4/inode.c > +++ b/fs/ext4/inode.c > @@ -1030,19 +1030,20 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) > BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); > mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; > > - /* Account for allocated meta_blocks */ > - mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; > - > - /* update fs free blocks counter for truncate case */ > - percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free); > + if (mdb_free) { > + /* Account for allocated meta_blocks */ > + mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; > + > + /* update fs dirty blocks counter */ > + percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); > + EXT4_I(inode)->i_allocated_meta_blocks = 0; > + EXT4_I(inode)->i_reserved_meta_blocks = mdb; > + } > > /* update per-inode reservations */ > BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); > EXT4_I(inode)->i_reserved_data_blocks -= used; > > - BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); > - EXT4_I(inode)->i_reserved_meta_blocks = mdb; > - EXT4_I(inode)->i_allocated_meta_blocks = 0; > spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); > } > > @@ -1588,8 +1589,8 @@ static void ext4_da_release_space(struct inode *inode, int to_free) > > release = to_free + mdb_free; > > - /* update fs free blocks counter for truncate case */ > - percpu_counter_add(&sbi->s_freeblocks_counter, release); > + /* update fs dirty blocks counter for truncate case */ > + percpu_counter_sub(&sbi->s_dirtyblocks_counter, release); > > /* update per-inode reservations */ > BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); > @@ -2471,7 +2472,6 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, > index = pos >> PAGE_CACHE_SHIFT; > from = pos & (PAGE_CACHE_SIZE - 1); > to = from + len; > - > retry: > /* > * With delayed allocation, we don't log the i_disksize update > diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c > index 419009f..4da4b9a 100644 > --- a/fs/ext4/mballoc.c > +++ b/fs/ext4/mballoc.c > @@ -2971,22 +2971,11 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, > le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); > gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); > spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); > - > + percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); > /* > - * free blocks account has already be reduced/reserved > - * at write_begin() time for delayed allocation > - * do not double accounting > + * Now reduce the dirty block count also. Should not go negative > */ > - if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED) && > - ac->ac_o_ex.fe_len != ac->ac_b_ex.fe_len) { > - /* > - * we allocated less blocks than we calimed > - * Add the difference back > - */ > - percpu_counter_add(&sbi->s_freeblocks_counter, > - ac->ac_o_ex.fe_len - ac->ac_b_ex.fe_len); > - } > - > + percpu_counter_sub(&sbi->s_dirtyblocks_counter, ac->ac_b_ex.fe_len); > if (sbi->s_log_groups_per_flex) { > ext4_group_t flex_group = ext4_flex_group(sbi, > ac->ac_b_ex.fe_group); > diff --git a/fs/ext4/super.c b/fs/ext4/super.c > index ed77786..7b9db51 100644 > --- a/fs/ext4/super.c > +++ b/fs/ext4/super.c > @@ -520,6 +520,7 @@ static void ext4_put_super(struct super_block *sb) > percpu_counter_destroy(&sbi->s_freeblocks_counter); > percpu_counter_destroy(&sbi->s_freeinodes_counter); > percpu_counter_destroy(&sbi->s_dirs_counter); > + percpu_counter_destroy(&sbi->s_dirtyblocks_counter); > brelse(sbi->s_sbh); > #ifdef CONFIG_QUOTA > for (i = 0; i < MAXQUOTAS; i++) > @@ -2259,6 +2260,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) > err = percpu_counter_init(&sbi->s_dirs_counter, > ext4_count_dirs(sb)); > } > + if (!err) { > + err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); > + } > if (err) { > printk(KERN_ERR "EXT4-fs: insufficient memory\n"); > goto failed_mount3; > @@ -2491,6 +2495,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) > percpu_counter_destroy(&sbi->s_freeblocks_counter); > percpu_counter_destroy(&sbi->s_freeinodes_counter); > percpu_counter_destroy(&sbi->s_dirs_counter); > + percpu_counter_destroy(&sbi->s_dirtyblocks_counter); > failed_mount2: > for (i = 0; i < db_count; i++) > brelse(sbi->s_group_desc[i]); > @@ -3164,7 +3169,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) > buf->f_type = EXT4_SUPER_MAGIC; > buf->f_bsize = sb->s_blocksize; > buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; > - buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); > + buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - > + percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); > ext4_free_blocks_count_set(es, buf->f_bfree); > buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); > if (buf->f_bfree < ext4_r_blocks_count(es)) -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html