Re: [PATCH 4/4] ext4: add proc files to monitor new structures

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Jan 29, 2021, at 3:29 PM, Harshad Shirwadkar <harshadshirwadkar@xxxxxxxxx> wrote:
> 
> This patch adds a new file "mb_structs" which allows us to see the
> largest free order lists and the serialized average fragment tree.

It might make sense to split these two structs into separate files.
On large filesystems, there will be millions of groups, so having
to dump both structs to find out information about one or the other
would be a lot of overhead.

Also, while the "mb_groups" file can be large (one line per group)
for millions of groups, the files added here may have millions of
groups on the same line.  Text processing tools are used to dealing
with many lines in a file, but are relatively poor at dealing with
millions of characters on a single line...

It would be good to have a "summary" file that dumps general info
about these structs (e.g. the number of groups at each list order,
depth of the rbtree).  That could be maintained easily for the c0
lists at least, since the list is locked whenever a group is added
and removed.

In Artem's patch to improve mballoc for large filesystems (which
didn't land, but was useful anyway), there was an "mb_alloc_stats"
file added:

https://github.com/lustre/lustre-release/blob/master/ldiskfs/kernel_patches/patches/rhel8/ext4-simple-blockalloc.patch#L102

/proc/fs/ldiskfs/dm-2/mb_alloc_stats:mballoc:
/proc/fs/ldiskfs/dm-2/mb_alloc_stats:   blocks: 0
/proc/fs/ldiskfs/dm-2/mb_alloc_stats:   reqs: 0
/proc/fs/ldiskfs/dm-2/mb_alloc_stats:   success: 0
/proc/fs/ldiskfs/dm-2/mb_alloc_stats:   extents_scanned: 0
/proc/fs/ldiskfs/dm-2/mb_alloc_stats:           goal_hits: 0
/proc/fs/ldiskfs/dm-2/mb_alloc_stats:           2^n_hits: 0
/proc/fs/ldiskfs/dm-2/mb_alloc_stats:           breaks: 0
/proc/fs/ldiskfs/dm-2/mb_alloc_stats:           lost: 0
/proc/fs/ldiskfs/dm-2/mb_alloc_stats:   useless_c1_loops: 0
/proc/fs/ldiskfs/dm-2/mb_alloc_stats:   useless_c2_loops: 0
/proc/fs/ldiskfs/dm-2/mb_alloc_stats:   useless_c3_loops: 163
/proc/fs/ldiskfs/dm-2/mb_alloc_stats:   skipped_c1_loops: 0
/proc/fs/ldiskfs/dm-2/mb_alloc_stats:   skipped_c2_loops: 1230
/proc/fs/ldiskfs/dm-2/mb_alloc_stats:   skipped_c3_loops: 0
/proc/fs/ldiskfs/dm-2/mb_alloc_stats:   buddies_generated: 6305
/proc/fs/ldiskfs/dm-2/mb_alloc_stats:   buddies_time_used: 20165523
/proc/fs/ldiskfs/dm-2/mb_alloc_stats:   preallocated: 9943702
/proc/fs/ldiskfs/dm-2/mb_alloc_stats:   discarded: 10506

that would still be useful to maintain, since only a few of the lines
are specific to his patch ({useless,skipped}_c[123]_loops)

> Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@xxxxxxxxx>
> ---
> fs/ext4/ext4.h    |  1 +
> fs/ext4/mballoc.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++
> fs/ext4/sysfs.c   |  2 ++
> 3 files changed, 82 insertions(+)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index da12a083bf52..835e304e3113 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2809,6 +2809,7 @@ int __init ext4_fc_init_dentry_cache(void);
> 
> /* mballoc.c */
> extern const struct seq_operations ext4_mb_seq_groups_ops;
> +extern const struct seq_operations ext4_mb_seq_structs_ops;
> extern long ext4_mb_stats;
> extern long ext4_mb_max_to_scan;
> extern int ext4_mb_init(struct super_block *);
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 413259477b03..ec4656903bd4 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -2741,6 +2741,85 @@ const struct seq_operations ext4_mb_seq_groups_ops = {
> 	.show   = ext4_mb_seq_groups_show,
> };
> 
> +static void *ext4_mb_seq_structs_start(struct seq_file *seq, loff_t *pos)
> +{
> +	struct super_block *sb = PDE_DATA(file_inode(seq->file));
> +	unsigned long position;
> +
> +	read_lock(&EXT4_SB(sb)->s_mb_rb_lock);
> +
> +	if (*pos < 0 || *pos >= MB_NUM_ORDERS(sb) + ext4_get_groups_count(sb))
> +		return NULL;
> +	position = *pos + 1;
> +	return (void *) ((unsigned long) position);
> +}
> +
> +static void *ext4_mb_seq_structs_next(struct seq_file *seq, void *v, loff_t *pos)
> +{
> +	struct super_block *sb = PDE_DATA(file_inode(seq->file));
> +	unsigned long position;
> +
> +	++*pos;
> +	if (*pos < 0 || *pos >= MB_NUM_ORDERS(sb) + ext4_get_groups_count(sb))
> +		return NULL;
> +	position = *pos + 1;
> +	return (void *) ((unsigned long) position);
> +}
> +
> +static int ext4_mb_seq_structs_show(struct seq_file *seq, void *v)
> +{
> +	struct super_block *sb = PDE_DATA(file_inode(seq->file));
> +	struct ext4_sb_info *sbi = EXT4_SB(sb);
> +	unsigned long position = ((unsigned long) v);
> +	struct ext4_group_info *grpinfo;
> +	struct rb_node *n;
> +	int i;
> +
> +	position--;
> +
> +	if (position >= MB_NUM_ORDERS(sb)) {
> +		position -= MB_NUM_ORDERS(sb);
> +		if (position == 0)
> +			seq_puts(seq, "Group, Avg Fragment Size\n");
> +		n = rb_first(&sbi->s_mb_avg_fragment_size_root);
> +		for (i = 0; n && i < position; i++)
> +			n = rb_next(n);
> +		if (!n)
> +			return 0;
> +		grpinfo = rb_entry(n, struct ext4_group_info, bb_avg_fragment_size_rb);
> +		seq_printf(seq, "%d, %d\n",
> +			   grpinfo->bb_group,
> +			   grpinfo->bb_fragments ? grpinfo->bb_free / grpinfo->bb_fragments : 0);
> +		return 0;
> +	}
> +
> +	if (position == 0)
> +		seq_puts(seq, "Largest Free Order Lists:\n");
> +
> +	seq_printf(seq, "[%ld]: ", position);
> +	list_for_each_entry(grpinfo, &sbi->s_mb_largest_free_orders[position],
> +			    bb_largest_free_order_node)	{
> +		seq_printf(seq, "%d ", grpinfo->bb_group);
> +	}
> +	seq_puts(seq, "\n");
> +
> +	return 0;
> +}
> +
> +static void ext4_mb_seq_structs_stop(struct seq_file *seq, void *v)
> +{
> +	struct super_block *sb = PDE_DATA(file_inode(seq->file));
> +
> +	read_unlock(&EXT4_SB(sb)->s_mb_rb_lock);
> +}
> +
> +const struct seq_operations ext4_mb_seq_structs_ops = {
> +	.start  = ext4_mb_seq_structs_start,
> +	.next   = ext4_mb_seq_structs_next,
> +	.stop   = ext4_mb_seq_structs_stop,
> +	.show   = ext4_mb_seq_structs_show,
> +};
> +
> static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
> {
> 	int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
> diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
> index 4e27fe6ed3ae..828d58b98310 100644
> --- a/fs/ext4/sysfs.c
> +++ b/fs/ext4/sysfs.c
> @@ -527,6 +527,8 @@ int ext4_register_sysfs(struct super_block *sb)
> 					ext4_fc_info_show, sb);
> 		proc_create_seq_data("mb_groups", S_IRUGO, sbi->s_proc,
> 				&ext4_mb_seq_groups_ops, sb);
> +		proc_create_seq_data("mb_structs", 0444, sbi->s_proc,
> +				&ext4_mb_seq_structs_ops, sb);
> 	}
> 	return 0;
> }
> --
> 2.30.0.365.g02bc693789-goog
> 


Cheers, Andreas





Attachment: signature.asc
Description: Message signed with OpenPGP


[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux