Re: [PATCH 3/9] readahead: record readahead patterns

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue 29-11-11 21:09:03, Wu Fengguang wrote:
> Record the readahead pattern in ra->pattern and extend the ra_submit()
> parameters, to be used by the next readahead tracing/stats patches.
> 
> 7 patterns are defined:
> 
>       	pattern			readahead for
> -----------------------------------------------------------
> 	RA_PATTERN_INITIAL	start-of-file read
> 	RA_PATTERN_SUBSEQUENT	trivial sequential read
> 	RA_PATTERN_CONTEXT	interleaved sequential read
> 	RA_PATTERN_OVERSIZE	oversize read
> 	RA_PATTERN_MMAP_AROUND	mmap fault
> 	RA_PATTERN_FADVISE	posix_fadvise()
> 	RA_PATTERN_RANDOM	random read
> 
> Note that random reads will be recorded in file_ra_state now.
> This won't deteriorate cache bouncing because the ra->prev_pos update
> in do_generic_file_read() already pollutes the data cache, and
> filemap_fault() will stop calling into us after MMAP_LOTSAMISS.
> 
> CC: Ingo Molnar <mingo@xxxxxxx>
> CC: Jens Axboe <axboe@xxxxxxxxx>
> CC: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
> Acked-by: Rik van Riel <riel@xxxxxxxxxx>
> Signed-off-by: Wu Fengguang <fengguang.wu@xxxxxxxxx>
  The patch looks OK. You can add:
Acked-by: Jan Kara <jack@xxxxxxx>

								Honza

> ---
>  include/linux/fs.h |   36 +++++++++++++++++++++++++++++++++++-
>  include/linux/mm.h |    4 +++-
>  mm/filemap.c       |    3 ++-
>  mm/readahead.c     |   29 ++++++++++++++++++++++-------
>  4 files changed, 62 insertions(+), 10 deletions(-)
> 
> --- linux-next.orig/include/linux/fs.h	2011-11-28 21:21:05.000000000 +0800
> +++ linux-next/include/linux/fs.h	2011-11-29 10:23:41.000000000 +0800
> @@ -945,11 +945,45 @@ struct file_ra_state {
>  					   there are only # of pages ahead */
>  
>  	unsigned int ra_pages;		/* Maximum readahead window */
> -	unsigned int mmap_miss;		/* Cache miss stat for mmap accesses */
> +	u16 mmap_miss;			/* Cache miss stat for mmap accesses */
> +	u8 pattern;			/* one of RA_PATTERN_* */
> +
>  	loff_t prev_pos;		/* Cache last read() position */
>  };
>  
>  /*
> + * Which policy makes decision to do the current read-ahead IO?
> + *
> + * RA_PATTERN_INITIAL		readahead window is initially opened,
> + *				normally when reading from start of file
> + * RA_PATTERN_SUBSEQUENT	readahead window is pushed forward
> + * RA_PATTERN_CONTEXT		no readahead window available, querying the
> + *				page cache to decide readahead start/size.
> + *				This typically happens on interleaved reads (eg.
> + *				reading pages 0, 1000, 1, 1001, 2, 1002, ...)
> + *				where one file_ra_state struct is not enough
> + *				for recording 2+ interleaved sequential read
> + *				streams.
> + * RA_PATTERN_MMAP_AROUND	read-around on mmap page faults
> + *				(w/o any sequential/random hints)
> + * RA_PATTERN_FADVISE		triggered by POSIX_FADV_WILLNEED or FMODE_RANDOM
> + * RA_PATTERN_OVERSIZE		a random read larger than max readahead size,
> + *				do max readahead to break down the read size
> + * RA_PATTERN_RANDOM		a small random read
> + */
> +enum readahead_pattern {
> +	RA_PATTERN_INITIAL,
> +	RA_PATTERN_SUBSEQUENT,
> +	RA_PATTERN_CONTEXT,
> +	RA_PATTERN_MMAP_AROUND,
> +	RA_PATTERN_FADVISE,
> +	RA_PATTERN_OVERSIZE,
> +	RA_PATTERN_RANDOM,
> +	RA_PATTERN_ALL,		/* for summary stats */
> +	RA_PATTERN_MAX
> +};
> +
> +/*
>   * Check if @index falls in the readahead windows.
>   */
>  static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
> --- linux-next.orig/mm/readahead.c	2011-11-28 22:24:16.000000000 +0800
> +++ linux-next/mm/readahead.c	2011-11-29 10:17:14.000000000 +0800
> @@ -249,7 +249,10 @@ unsigned long max_sane_readahead(unsigne
>   * Submit IO for the read-ahead request in file_ra_state.
>   */
>  unsigned long ra_submit(struct file_ra_state *ra,
> -		       struct address_space *mapping, struct file *filp)
> +			struct address_space *mapping,
> +			struct file *filp,
> +			pgoff_t offset,
> +			unsigned long req_size)
>  {
>  	pgoff_t eof = ((i_size_read(mapping->host)-1) >> PAGE_CACHE_SHIFT) + 1;
>  	pgoff_t start = ra->start;
> @@ -390,6 +393,7 @@ static int try_context_readahead(struct 
>  	if (size >= offset)
>  		size *= 2;
>  
> +	ra->pattern = RA_PATTERN_CONTEXT;
>  	ra->start = offset;
>  	ra->size = get_init_ra_size(size + req_size, max);
>  	ra->async_size = ra->size;
> @@ -411,8 +415,10 @@ ondemand_readahead(struct address_space 
>  	/*
>  	 * start of file
>  	 */
> -	if (!offset)
> +	if (!offset) {
> +		ra->pattern = RA_PATTERN_INITIAL;
>  		goto initial_readahead;
> +	}
>  
>  	/*
>  	 * It's the expected callback offset, assume sequential access.
> @@ -420,6 +426,7 @@ ondemand_readahead(struct address_space 
>  	 */
>  	if ((offset == (ra->start + ra->size - ra->async_size) ||
>  	     offset == (ra->start + ra->size))) {
> +		ra->pattern = RA_PATTERN_SUBSEQUENT;
>  		ra->start += ra->size;
>  		ra->size = get_next_ra_size(ra, max);
>  		ra->async_size = ra->size;
> @@ -442,6 +449,7 @@ ondemand_readahead(struct address_space 
>  		if (!start || start - offset > max)
>  			return 0;
>  
> +		ra->pattern = RA_PATTERN_CONTEXT;
>  		ra->start = start;
>  		ra->size = start - offset;	/* old async_size */
>  		ra->size += req_size;
> @@ -453,14 +461,18 @@ ondemand_readahead(struct address_space 
>  	/*
>  	 * oversize read
>  	 */
> -	if (req_size > max)
> +	if (req_size > max) {
> +		ra->pattern = RA_PATTERN_OVERSIZE;
>  		goto initial_readahead;
> +	}
>  
>  	/*
>  	 * sequential cache miss
>  	 */
> -	if (offset - (ra->prev_pos >> PAGE_CACHE_SHIFT) <= 1UL)
> +	if (offset - (ra->prev_pos >> PAGE_CACHE_SHIFT) <= 1UL) {
> +		ra->pattern = RA_PATTERN_INITIAL;
>  		goto initial_readahead;
> +	}
>  
>  	/*
>  	 * Query the page cache and look for the traces(cached history pages)
> @@ -471,9 +483,12 @@ ondemand_readahead(struct address_space 
>  
>  	/*
>  	 * standalone, small random read
> -	 * Read as is, and do not pollute the readahead state.
>  	 */
> -	return __do_page_cache_readahead(mapping, filp, offset, req_size, 0);
> +	ra->pattern = RA_PATTERN_RANDOM;
> +	ra->start = offset;
> +	ra->size = req_size;
> +	ra->async_size = 0;
> +	goto readit;
>  
>  initial_readahead:
>  	ra->start = offset;
> @@ -491,7 +506,7 @@ readit:
>  		ra->size += ra->async_size;
>  	}
>  
> -	return ra_submit(ra, mapping, filp);
> +	return ra_submit(ra, mapping, filp, offset, req_size);
>  }
>  
>  /**
> --- linux-next.orig/include/linux/mm.h	2011-11-28 21:21:05.000000000 +0800
> +++ linux-next/include/linux/mm.h	2011-11-28 22:24:16.000000000 +0800
> @@ -1456,7 +1456,9 @@ void page_cache_async_readahead(struct a
>  unsigned long max_sane_readahead(unsigned long nr);
>  unsigned long ra_submit(struct file_ra_state *ra,
>  			struct address_space *mapping,
> -			struct file *filp);
> +			struct file *filp,
> +			pgoff_t offset,
> +			unsigned long req_size);
>  
>  /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
>  extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
> --- linux-next.orig/mm/filemap.c	2011-11-28 21:21:05.000000000 +0800
> +++ linux-next/mm/filemap.c	2011-11-29 10:17:14.000000000 +0800
> @@ -1611,11 +1611,12 @@ static void do_sync_mmap_readahead(struc
>  	/*
>  	 * mmap read-around
>  	 */
> +	ra->pattern = RA_PATTERN_MMAP_AROUND;
>  	ra_pages = max_sane_readahead(ra->ra_pages);
>  	ra->start = max_t(long, 0, offset - ra_pages / 2);
>  	ra->size = ra_pages;
>  	ra->async_size = ra_pages / 4;
> -	ra_submit(ra, mapping, file);
> +	ra_submit(ra, mapping, file, offset, 1);
>  }
>  
>  /*
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
-- 
Jan Kara <jack@xxxxxxx>
SUSE Labs, CR

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>


[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]