On Aug 15, 2019, at 9:49 PM, Dongyang Li <dongyangli@xxxxxxx> wrote: > > If overhead is not recorded in the super block, it is caculated > during mount in kernel, for bigalloc file systems the it takes > O(groups**2) in time. > For a 1PB deivce with 32K cluste size it takes ~12 mins to > mount, with most of the time spent on figuring out overhead. > > While we can not improve the overhead algorithm in kernel > due to the nature of bigalloc, we can work out the overhead > during mke2fs and set it in the super block, avoiding calculating > it every time during mounting. > > Overhead is s_first_data_block plus internal journal blocks plus > the block and inode bitmaps, inode table, super block backups and > group descriptor blocks for every group. With the patch we calculate > the overhead when converting the block bitmap to cluster bitmap. > > When bad blocks are involved, it gets tricky because the blocks > counted as overhead and the bad blocks can end up in the same > allocation cluster. In this case we will unmark the bad blocks from > the block bitmap, covert to cluster bitmap and get the overhead, > then mark the bad blocks back in the cluster bitmap. > > Fix a bug in handle_bad_blocks(), don't covert the bad block to > cluster when marking it as used, the bitmap is still a block bitmap, > will be coverted to cluster bitmap later. > > Note: in kernel the overhead is the s_overhead_clusters field from > struct ext4_super_block, it's named s_overhead_blocks in e2fsprogs. > > Signed-off-by: Li Dongyang <dongyangli@xxxxxxx> Reviewed-by: Andreas Dilger <adilger@xxxxxxxxx> > --- > lib/ext2fs/ext2fs.h | 4 +++ > lib/ext2fs/gen_bitmap64.c | 61 ++++++++++++++++++++++++++++++++++----- > misc/mke2fs.c | 15 ++++++++-- > 3 files changed, 69 insertions(+), 11 deletions(-) > > diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h > index 59fd9742..a70924b3 100644 > --- a/lib/ext2fs/ext2fs.h > +++ b/lib/ext2fs/ext2fs.h > @@ -1437,6 +1437,10 @@ errcode_t ext2fs_set_generic_bmap_range(ext2fs_generic_bitmap bmap, > void *in); > errcode_t ext2fs_convert_subcluster_bitmap(ext2_filsys fs, > ext2fs_block_bitmap *bitmap); > +errcode_t ext2fs_convert_subcluster_bitmap_overhead(ext2_filsys fs, > + ext2fs_block_bitmap *bitmap, > + badblocks_list bb_list, > + unsigned int *count); > > /* get_num_dirs.c */ > extern errcode_t ext2fs_get_num_dirs(ext2_filsys fs, ext2_ino_t *ret_num_dirs); > diff --git a/lib/ext2fs/gen_bitmap64.c b/lib/ext2fs/gen_bitmap64.c > index 97601232..0f67f9c4 100644 > --- a/lib/ext2fs/gen_bitmap64.c > +++ b/lib/ext2fs/gen_bitmap64.c > @@ -794,18 +794,46 @@ void ext2fs_warn_bitmap32(ext2fs_generic_bitmap gen_bitmap, const char *func) > #endif > } > > -errcode_t ext2fs_convert_subcluster_bitmap(ext2_filsys fs, > - ext2fs_block_bitmap *bitmap) > +errcode_t ext2fs_convert_subcluster_bitmap_overhead(ext2_filsys fs, > + ext2fs_block_bitmap *bitmap, > + badblocks_list bb_list, > + unsigned int *count) > { > ext2fs_generic_bitmap_64 bmap, cmap; > ext2fs_block_bitmap gen_bmap = *bitmap, gen_cmap; > errcode_t retval; > - blk64_t i, next, b_end, c_end; > + blk64_t blk, next, b_end, c_end; > + unsigned int clusters = 0; > + blk_t super_and_bgd, bblk; > + badblocks_iterate bb_iter; > + dgrp_t i; > int ratio; > > bmap = (ext2fs_generic_bitmap_64) gen_bmap; > - if (fs->cluster_ratio_bits == ext2fs_get_bitmap_granularity(gen_bmap)) > + if (fs->cluster_ratio_bits == > + ext2fs_get_bitmap_granularity(gen_bmap)) { > + if (count) { > + for (i = 0; i < fs->group_desc_count; i++) { > + ext2fs_super_and_bgd_loc2(fs, i, NULL, NULL, > + NULL, > + &super_and_bgd); > + clusters += super_and_bgd + > + fs->inode_blocks_per_group + 2; > + } > + *count = clusters; > + } > return 0; /* Nothing to do */ > + } > + > + if (bb_list) { > + retval = ext2fs_badblocks_list_iterate_begin(bb_list, > + &bb_iter); > + if (retval) > + return retval; > + while (ext2fs_badblocks_list_iterate(bb_iter, &bblk)) > + ext2fs_unmark_block_bitmap2(gen_bmap, bblk); > + bb_iter->ptr = 0; > + } > > retval = ext2fs_allocate_block_bitmap(fs, "converted cluster bitmap", > &gen_cmap); > @@ -813,27 +841,44 @@ errcode_t ext2fs_convert_subcluster_bitmap(ext2_filsys fs, > return retval; > > cmap = (ext2fs_generic_bitmap_64) gen_cmap; > - i = bmap->start; > + blk = bmap->start; > b_end = bmap->end; > bmap->end = bmap->real_end; > c_end = cmap->end; > cmap->end = cmap->real_end; > ratio = 1 << fs->cluster_ratio_bits; > - while (i < bmap->real_end) { > + while (blk < bmap->real_end) { > retval = ext2fs_find_first_set_block_bitmap2(gen_bmap, > - i, bmap->real_end, &next); > + blk, bmap->real_end, &next); > if (retval) > break; > ext2fs_mark_block_bitmap2(gen_cmap, next); > - i = bmap->start + roundup(next - bmap->start + 1, ratio); > + blk = bmap->start + roundup(next - bmap->start + 1, ratio); > + clusters++; > } > bmap->end = b_end; > cmap->end = c_end; > ext2fs_free_block_bitmap(gen_bmap); > + > + if (bb_list) { > + while (ext2fs_badblocks_list_iterate(bb_iter, &bblk)) > + ext2fs_mark_block_bitmap2(gen_cmap, bblk); > + ext2fs_badblocks_list_iterate_end(bb_iter); > + } > + > *bitmap = (ext2fs_block_bitmap) cmap; > + if (count) > + *count = clusters; > return 0; > } > > +errcode_t ext2fs_convert_subcluster_bitmap(ext2_filsys fs, > + ext2fs_block_bitmap *bitmap) > +{ > + return ext2fs_convert_subcluster_bitmap_overhead(fs, bitmap, > + NULL, NULL); > +} > + > errcode_t ext2fs_find_first_zero_generic_bmap(ext2fs_generic_bitmap bitmap, > __u64 start, __u64 end, __u64 *out) > { > diff --git a/misc/mke2fs.c b/misc/mke2fs.c > index d7cf257e..baa87b36 100644 > --- a/misc/mke2fs.c > +++ b/misc/mke2fs.c > @@ -344,7 +344,7 @@ _("Warning: the backup superblock/group descriptors at block %u contain\n" > exit(1); > } > while (ext2fs_badblocks_list_iterate(bb_iter, &blk)) > - ext2fs_mark_block_bitmap2(fs->block_map, EXT2FS_B2C(fs, blk)); > + ext2fs_mark_block_bitmap2(fs->block_map, blk); > ext2fs_badblocks_list_iterate_end(bb_iter); > } > > @@ -2913,6 +2913,7 @@ int main (int argc, char *argv[]) > ext2_filsys fs; > badblocks_list bb_list = 0; > unsigned int journal_blocks = 0; > + unsigned int overhead; > unsigned int i, checkinterval; > int max_mnt_count; > int val, hash_alg; > @@ -3213,7 +3214,9 @@ int main (int argc, char *argv[]) > if (!quiet) > printf("%s", _("done \n")); > > - retval = ext2fs_convert_subcluster_bitmap(fs, &fs->block_map); > + retval = ext2fs_convert_subcluster_bitmap_overhead(fs, &fs->block_map, > + bb_list, > + &overhead); > if (retval) { > com_err(program_name, retval, "%s", > _("\n\twhile converting subcluster bitmap")); > @@ -3317,6 +3320,7 @@ int main (int argc, char *argv[]) > free(journal_device); > } else if ((journal_size) || > ext2fs_has_feature_journal(&fs_param)) { > + overhead += EXT2FS_B2C(fs, journal_blocks); > if (super_only) { > printf("%s", _("Skipping journal creation in super-only mode\n")); > fs->super->s_journal_inum = EXT2_JOURNAL_INO; > @@ -3359,8 +3363,13 @@ no_journal: > fs->super->s_mmp_update_interval); > } > > - if (ext2fs_has_feature_bigalloc(&fs_param)) > + overhead += fs->super->s_first_data_block; > + > + if (ext2fs_has_feature_bigalloc(&fs_param)) { > fix_cluster_bg_counts(fs); > + if (!super_only) > + fs->super->s_overhead_blocks = overhead; > + } > if (ext2fs_has_feature_quota(&fs_param)) > create_quota_inodes(fs); > > -- > 2.22.1 > Cheers, Andreas
Attachment:
signature.asc
Description: Message signed with OpenPGP