On Jan 29, 2021, at 3:29 PM, Harshad Shirwadkar <harshadshirwadkar@xxxxxxxxx> wrote: > > This patch adds a new file "mb_structs" which allows us to see the > largest free order lists and the serialized average fragment tree. It might make sense to split these two structs into separate files. On large filesystems, there will be millions of groups, so having to dump both structs to find out information about one or the other would be a lot of overhead. Also, while the "mb_groups" file can be large (one line per group) for millions of groups, the files added here may have millions of groups on the same line. Text processing tools are used to dealing with many lines in a file, but are relatively poor at dealing with millions of characters on a single line... It would be good to have a "summary" file that dumps general info about these structs (e.g. the number of groups at each list order, depth of the rbtree). That could be maintained easily for the c0 lists at least, since the list is locked whenever a group is added and removed. In Artem's patch to improve mballoc for large filesystems (which didn't land, but was useful anyway), there was an "mb_alloc_stats" file added: https://github.com/lustre/lustre-release/blob/master/ldiskfs/kernel_patches/patches/rhel8/ext4-simple-blockalloc.patch#L102 /proc/fs/ldiskfs/dm-2/mb_alloc_stats:mballoc: /proc/fs/ldiskfs/dm-2/mb_alloc_stats: blocks: 0 /proc/fs/ldiskfs/dm-2/mb_alloc_stats: reqs: 0 /proc/fs/ldiskfs/dm-2/mb_alloc_stats: success: 0 /proc/fs/ldiskfs/dm-2/mb_alloc_stats: extents_scanned: 0 /proc/fs/ldiskfs/dm-2/mb_alloc_stats: goal_hits: 0 /proc/fs/ldiskfs/dm-2/mb_alloc_stats: 2^n_hits: 0 /proc/fs/ldiskfs/dm-2/mb_alloc_stats: breaks: 0 /proc/fs/ldiskfs/dm-2/mb_alloc_stats: lost: 0 /proc/fs/ldiskfs/dm-2/mb_alloc_stats: useless_c1_loops: 0 /proc/fs/ldiskfs/dm-2/mb_alloc_stats: useless_c2_loops: 0 /proc/fs/ldiskfs/dm-2/mb_alloc_stats: useless_c3_loops: 163 /proc/fs/ldiskfs/dm-2/mb_alloc_stats: skipped_c1_loops: 0 /proc/fs/ldiskfs/dm-2/mb_alloc_stats: skipped_c2_loops: 1230 /proc/fs/ldiskfs/dm-2/mb_alloc_stats: skipped_c3_loops: 0 /proc/fs/ldiskfs/dm-2/mb_alloc_stats: buddies_generated: 6305 /proc/fs/ldiskfs/dm-2/mb_alloc_stats: buddies_time_used: 20165523 /proc/fs/ldiskfs/dm-2/mb_alloc_stats: preallocated: 9943702 /proc/fs/ldiskfs/dm-2/mb_alloc_stats: discarded: 10506 that would still be useful to maintain, since only a few of the lines are specific to his patch ({useless,skipped}_c[123]_loops) > Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@xxxxxxxxx> > --- > fs/ext4/ext4.h | 1 + > fs/ext4/mballoc.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++ > fs/ext4/sysfs.c | 2 ++ > 3 files changed, 82 insertions(+) > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index da12a083bf52..835e304e3113 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -2809,6 +2809,7 @@ int __init ext4_fc_init_dentry_cache(void); > > /* mballoc.c */ > extern const struct seq_operations ext4_mb_seq_groups_ops; > +extern const struct seq_operations ext4_mb_seq_structs_ops; > extern long ext4_mb_stats; > extern long ext4_mb_max_to_scan; > extern int ext4_mb_init(struct super_block *); > diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c > index 413259477b03..ec4656903bd4 100644 > --- a/fs/ext4/mballoc.c > +++ b/fs/ext4/mballoc.c > @@ -2741,6 +2741,85 @@ const struct seq_operations ext4_mb_seq_groups_ops = { > .show = ext4_mb_seq_groups_show, > }; > > +static void *ext4_mb_seq_structs_start(struct seq_file *seq, loff_t *pos) > +{ > + struct super_block *sb = PDE_DATA(file_inode(seq->file)); > + unsigned long position; > + > + read_lock(&EXT4_SB(sb)->s_mb_rb_lock); > + > + if (*pos < 0 || *pos >= MB_NUM_ORDERS(sb) + ext4_get_groups_count(sb)) > + return NULL; > + position = *pos + 1; > + return (void *) ((unsigned long) position); > +} > + > +static void *ext4_mb_seq_structs_next(struct seq_file *seq, void *v, loff_t *pos) > +{ > + struct super_block *sb = PDE_DATA(file_inode(seq->file)); > + unsigned long position; > + > + ++*pos; > + if (*pos < 0 || *pos >= MB_NUM_ORDERS(sb) + ext4_get_groups_count(sb)) > + return NULL; > + position = *pos + 1; > + return (void *) ((unsigned long) position); > +} > + > +static int ext4_mb_seq_structs_show(struct seq_file *seq, void *v) > +{ > + struct super_block *sb = PDE_DATA(file_inode(seq->file)); > + struct ext4_sb_info *sbi = EXT4_SB(sb); > + unsigned long position = ((unsigned long) v); > + struct ext4_group_info *grpinfo; > + struct rb_node *n; > + int i; > + > + position--; > + > + if (position >= MB_NUM_ORDERS(sb)) { > + position -= MB_NUM_ORDERS(sb); > + if (position == 0) > + seq_puts(seq, "Group, Avg Fragment Size\n"); > + n = rb_first(&sbi->s_mb_avg_fragment_size_root); > + for (i = 0; n && i < position; i++) > + n = rb_next(n); > + if (!n) > + return 0; > + grpinfo = rb_entry(n, struct ext4_group_info, bb_avg_fragment_size_rb); > + seq_printf(seq, "%d, %d\n", > + grpinfo->bb_group, > + grpinfo->bb_fragments ? grpinfo->bb_free / grpinfo->bb_fragments : 0); > + return 0; > + } > + > + if (position == 0) > + seq_puts(seq, "Largest Free Order Lists:\n"); > + > + seq_printf(seq, "[%ld]: ", position); > + list_for_each_entry(grpinfo, &sbi->s_mb_largest_free_orders[position], > + bb_largest_free_order_node) { > + seq_printf(seq, "%d ", grpinfo->bb_group); > + } > + seq_puts(seq, "\n"); > + > + return 0; > +} > + > +static void ext4_mb_seq_structs_stop(struct seq_file *seq, void *v) > +{ > + struct super_block *sb = PDE_DATA(file_inode(seq->file)); > + > + read_unlock(&EXT4_SB(sb)->s_mb_rb_lock); > +} > + > +const struct seq_operations ext4_mb_seq_structs_ops = { > + .start = ext4_mb_seq_structs_start, > + .next = ext4_mb_seq_structs_next, > + .stop = ext4_mb_seq_structs_stop, > + .show = ext4_mb_seq_structs_show, > +}; > + > static struct kmem_cache *get_groupinfo_cache(int blocksize_bits) > { > int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; > diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c > index 4e27fe6ed3ae..828d58b98310 100644 > --- a/fs/ext4/sysfs.c > +++ b/fs/ext4/sysfs.c > @@ -527,6 +527,8 @@ int ext4_register_sysfs(struct super_block *sb) > ext4_fc_info_show, sb); > proc_create_seq_data("mb_groups", S_IRUGO, sbi->s_proc, > &ext4_mb_seq_groups_ops, sb); > + proc_create_seq_data("mb_structs", 0444, sbi->s_proc, > + &ext4_mb_seq_structs_ops, sb); > } > return 0; > } > -- > 2.30.0.365.g02bc693789-goog > Cheers, Andreas
Attachment:
signature.asc
Description: Message signed with OpenPGP