Re: [PATCH 4/4] ext4: add prefetch_block_bitmaps mount options

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Jul 31, 2020, at 1:08 PM, Theodore Ts'o <tytso@xxxxxxx> wrote:
> 
> For file systems where we can afford to keep the buddy bitmaps cached,
> we can speed up initial writes to large file systems by starting to
> load the block allocation bitmaps as soon as the file system is
> mounted.  This won't work well for _super_ large file systems, or
> memory constrained systems, so we only enable this when it is
> requested via a mount option.
> 
> Addresses-Google-Bug: 159488342
> Signed-off-by: Theodore Ts'o <tytso@xxxxxxx>

Reviewed-by: Andreas Dilger <adilger@xxxxxxxxx>

> ---
> fs/ext4/ext4.h              | 15 +++++++++-
> fs/ext4/mballoc.c           | 10 +++----
> fs/ext4/super.c             | 59 +++++++++++++++++++++++++++----------
> include/trace/events/ext4.h | 44 +++++++++++++++++++++++++++
> 4 files changed, 105 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 7451662e092a..4df6f429de1a 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1172,6 +1172,7 @@ struct ext4_inode_info {
> #define EXT4_MOUNT_JOURNAL_CHECKSUM	0x800000 /* Journal checksums */
> #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */
> #define EXT4_MOUNT_WARN_ON_ERROR	0x2000000 /* Trigger WARN_ON on error */
> +#define EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS 0x4000000
> #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
> #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
> #define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
> @@ -2315,9 +2316,15 @@ struct ext4_lazy_init {
> 	struct mutex		li_list_mtx;
> };
> 
> +enum ext4_li_mode {
> +	EXT4_LI_MODE_PREFETCH_BBITMAP,
> +	EXT4_LI_MODE_ITABLE,
> +};
> +
> struct ext4_li_request {
> 	struct super_block	*lr_super;
> -	struct ext4_sb_info	*lr_sbi;
> +	enum ext4_li_mode	lr_mode;
> +	ext4_group_t		lr_first_not_zeroed;
> 	ext4_group_t		lr_next_group;
> 	struct list_head	lr_request;
> 	unsigned long		lr_next_sched;
> @@ -2657,6 +2664,12 @@ extern int ext4_mb_reserve_blocks(struct super_block *, int);
> extern void ext4_discard_preallocations(struct inode *);
> extern int __init ext4_init_mballoc(void);
> extern void ext4_exit_mballoc(void);
> +extern ext4_group_t ext4_mb_prefetch(struct super_block *sb,
> +				     ext4_group_t group,
> +				     unsigned int nr, int *cnt);
> +extern void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
> +				  unsigned int nr);
> +
> extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
> 			     struct buffer_head *bh, ext4_fsblk_t block,
> 			     unsigned long count, int flags);
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index b1ef35a9e9f1..47de61e44db2 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -2233,9 +2233,8 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
>  * Start prefetching @nr block bitmaps starting at @group.
>  * Return the next group which needs to be prefetched.
>  */
> -static ext4_group_t
> -ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
> -		 unsigned int nr, int *cnt)
> +ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
> +			      unsigned int nr, int *cnt)
> {
> 	ext4_group_t ngroups = ext4_get_groups_count(sb);
> 	struct buffer_head *bh;
> @@ -2285,9 +2284,8 @@ ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
>  * waiting for the block allocation bitmap read to finish when
>  * ext4_mb_prefetch_fini is called from ext4_mb_regular_allocator().
>  */
> -static void
> -ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
> -		      unsigned int nr)
> +void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
> +			   unsigned int nr)
> {
> 	while (nr-- > 0) {
> 		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 330957ed1f05..51e91a220ea9 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1521,6 +1521,7 @@ enum {
> 	Opt_dioread_nolock, Opt_dioread_lock,
> 	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
> 	Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
> +	Opt_prefetch_block_bitmaps,
> };
> 
> static const match_table_t tokens = {
> @@ -1612,6 +1613,7 @@ static const match_table_t tokens = {
> 	{Opt_test_dummy_encryption, "test_dummy_encryption"},
> 	{Opt_nombcache, "nombcache"},
> 	{Opt_nombcache, "no_mbcache"},	/* for backward compatibility */
> +	{Opt_prefetch_block_bitmaps, "prefetch_block_bitmaps"},
> 	{Opt_removed, "check=none"},	/* mount option from ext2/3 */
> 	{Opt_removed, "nocheck"},	/* mount option from ext2/3 */
> 	{Opt_removed, "reservation"},	/* mount option from ext2/3 */
> @@ -1829,6 +1831,8 @@ static const struct mount_opts {
> 	{Opt_max_dir_size_kb, 0, MOPT_GTE0},
> 	{Opt_test_dummy_encryption, 0, MOPT_STRING},
> 	{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
> +	{Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS,
> +	 MOPT_SET},
> 	{Opt_err, 0, 0}
> };
> 
> @@ -3201,15 +3205,34 @@ static void print_daily_error_info(struct timer_list *t)
> static int ext4_run_li_request(struct ext4_li_request *elr)
> {
> 	struct ext4_group_desc *gdp = NULL;
> -	ext4_group_t group, ngroups;
> -	struct super_block *sb;
> +	struct super_block *sb = elr->lr_super;
> +	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
> +	ext4_group_t group = elr->lr_next_group;
> 	unsigned long timeout = 0;
> +	unsigned int prefetch_ios = 0;
> 	int ret = 0;
> 
> -	sb = elr->lr_super;
> -	ngroups = EXT4_SB(sb)->s_groups_count;
> +	if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
> +		elr->lr_next_group = ext4_mb_prefetch(sb, group,
> +				EXT4_SB(sb)->s_mb_prefetch, &prefetch_ios);
> +		if (prefetch_ios)
> +			ext4_mb_prefetch_fini(sb, elr->lr_next_group,
> +					      prefetch_ios);
> +		trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group,
> +					    prefetch_ios);
> +		if (group >= elr->lr_next_group) {
> +			ret = 1;
> +			if (elr->lr_first_not_zeroed != ngroups &&
> +			    !sb_rdonly(sb) && test_opt(sb, INIT_INODE_TABLE)) {
> +				elr->lr_next_group = elr->lr_first_not_zeroed;
> +				elr->lr_mode = EXT4_LI_MODE_ITABLE;
> +				ret = 0;
> +			}
> +		}
> +		return ret;
> +	}
> 
> -	for (group = elr->lr_next_group; group < ngroups; group++) {
> +	for (; group < ngroups; group++) {
> 		gdp = ext4_get_group_desc(sb, group, NULL);
> 		if (!gdp) {
> 			ret = 1;
> @@ -3227,9 +3250,10 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
> 		timeout = jiffies;
> 		ret = ext4_init_inode_table(sb, group,
> 					    elr->lr_timeout ? 0 : 1);
> +		trace_ext4_lazy_itable_init(sb, group);
> 		if (elr->lr_timeout == 0) {
> 			timeout = (jiffies - timeout) *
> -				  elr->lr_sbi->s_li_wait_mult;
> +				EXT4_SB(elr->lr_super)->s_li_wait_mult;
> 			elr->lr_timeout = timeout;
> 		}
> 		elr->lr_next_sched = jiffies + elr->lr_timeout;
> @@ -3244,15 +3268,11 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
>  */
> static void ext4_remove_li_request(struct ext4_li_request *elr)
> {
> -	struct ext4_sb_info *sbi;
> -
> 	if (!elr)
> 		return;
> 
> -	sbi = elr->lr_sbi;
> -
> 	list_del(&elr->lr_request);
> -	sbi->s_li_request = NULL;
> +	EXT4_SB(elr->lr_super)->s_li_request = NULL;
> 	kfree(elr);
> }
> 
> @@ -3461,7 +3481,6 @@ static int ext4_li_info_new(void)
> static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
> 					    ext4_group_t start)
> {
> -	struct ext4_sb_info *sbi = EXT4_SB(sb);
> 	struct ext4_li_request *elr;
> 
> 	elr = kzalloc(sizeof(*elr), GFP_KERNEL);
> @@ -3469,8 +3488,13 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
> 		return NULL;
> 
> 	elr->lr_super = sb;
> -	elr->lr_sbi = sbi;
> -	elr->lr_next_group = start;
> +	elr->lr_first_not_zeroed = start;
> +	if (test_opt(sb, PREFETCH_BLOCK_BITMAPS))
> +		elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
> +	else {
> +		elr->lr_mode = EXT4_LI_MODE_ITABLE;
> +		elr->lr_next_group = start;
> +	}
> 
> 	/*
> 	 * Randomize first schedule time of the request to
> @@ -3488,6 +3512,7 @@ int ext4_register_li_request(struct super_block *sb,
> 	struct ext4_sb_info *sbi = EXT4_SB(sb);
> 	struct ext4_li_request *elr = NULL;
> 	ext4_group_t ngroups = sbi->s_groups_count;
> +	enum ext4_li_mode lr_mode = EXT4_LI_MODE_ITABLE;
> 	int ret = 0;
> 
> 	mutex_lock(&ext4_li_mtx);
> @@ -3500,8 +3525,10 @@ int ext4_register_li_request(struct super_block *sb,
> 		goto out;
> 	}
> 
> -	if (first_not_zeroed == ngroups || sb_rdonly(sb) ||
> -	    !test_opt(sb, INIT_INODE_TABLE))
> +	if (test_opt(sb, PREFETCH_BLOCK_BITMAPS)) {
> +		lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
> +	} else if (first_not_zeroed == ngroups || sb_rdonly(sb) ||
> +		   !test_opt(sb, INIT_INODE_TABLE))
> 		goto out;
> 
> 	elr = ext4_li_request_new(sb, first_not_zeroed);
> diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
> index cbcd2e1a608d..8008d2e116b9 100644
> --- a/include/trace/events/ext4.h
> +++ b/include/trace/events/ext4.h
> @@ -2742,6 +2742,50 @@ TRACE_EVENT(ext4_error,
> 		  __entry->function, __entry->line)
> );
> 
> +TRACE_EVENT(ext4_prefetch_bitmaps,
> +	    TP_PROTO(struct super_block *sb, ext4_group_t group,
> +		     ext4_group_t next, unsigned int prefetch_ios),
> +
> +	TP_ARGS(sb, group, next, prefetch_ios),
> +
> +	TP_STRUCT__entry(
> +		__field(	dev_t,	dev			)
> +		__field(	__u32,	group			)
> +		__field(	__u32,	next			)
> +		__field(	__u32,	ios			)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->dev	= sb->s_dev;
> +		__entry->group	= group;
> +		__entry->next	= next;
> +		__entry->ios	= prefetch_ios;
> +	),
> +
> +	TP_printk("dev %d,%d group %u next %u ios %u",
> +		  MAJOR(__entry->dev), MINOR(__entry->dev),
> +		  __entry->group, __entry->next, __entry->ios)
> +);
> +
> +TRACE_EVENT(ext4_lazy_itable_init,
> +	    TP_PROTO(struct super_block *sb, ext4_group_t group),
> +
> +	TP_ARGS(sb, group),
> +
> +	TP_STRUCT__entry(
> +		__field(	dev_t,	dev			)
> +		__field(	__u32,	group			)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->dev	= sb->s_dev;
> +		__entry->group	= group;
> +	),
> +
> +	TP_printk("dev %d,%d group %u",
> +		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->group)
> +);
> +
> #endif /* _TRACE_EXT4_H */
> 
> /* This part must be outside protection */
> --
> 2.24.1
> 


Cheers, Andreas





Attachment: signature.asc
Description: Message signed with OpenPGP


[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux