The patch titled readahead: state based method - routines has been added to the -mm tree. Its filename is readahead-state-based-method-routines.patch See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find out what to do about this ------------------------------------------------------ Subject: readahead: state based method - routines From: Wu Fengguang <wfg@xxxxxxxxxxxxxxxx> Extend struct file_ra_state to support the adaptive read-ahead logic. Also define some helpers for it. Signed-off-by: Wu Fengguang <wfg@xxxxxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- include/linux/fs.h | 57 ++++++++++-- mm/readahead.c | 189 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 234 insertions(+), 12 deletions(-) diff -puN include/linux/fs.h~readahead-state-based-method-routines include/linux/fs.h --- 25/include/linux/fs.h~readahead-state-based-method-routines Fri May 26 13:34:10 2006 +++ 25-akpm/include/linux/fs.h Fri May 26 13:34:10 2006 @@ -613,21 +613,58 @@ struct fown_struct { /* * Track a single file's readahead state + * + * Diagram for the adaptive readahead logic: + * + * |--------- old chunk ------->|-------------- new chunk -------------->| + * +----------------------------+----------------------------------------+ + * | # | # | + * +----------------------------+----------------------------------------+ + * ^ ^ ^ ^ + * file_ra_state.la_index .ra_index .lookahead_index .readahead_index + * + * Deduced sizes: + * |----------- readahead size ------------>| + * +----------------------------+----------------------------------------+ + * | # | # | + * +----------------------------+----------------------------------------+ + * |------- invoke interval ------>|-- lookahead size -->| */ struct file_ra_state { - unsigned long start; /* Current window */ - unsigned long size; - unsigned long flags; /* ra flags RA_FLAG_xxx*/ - unsigned long cache_hit; /* cache hit count*/ - unsigned long prev_page; /* Cache last read() position */ - unsigned long ahead_start; /* Ahead window */ - unsigned long ahead_size; - unsigned long ra_pages; /* Maximum readahead window */ - unsigned long mmap_hit; /* Cache hit stat for mmap accesses */ - unsigned long mmap_miss; /* Cache miss stat for mmap accesses */ + union { + struct { /* conventional read-ahead */ + unsigned long start; /* Current window */ + unsigned long size; + unsigned long ahead_start; /* Ahead window */ + unsigned long ahead_size; + unsigned long cache_hit; /* cache hit count */ + }; +#ifdef CONFIG_ADAPTIVE_READAHEAD + struct { /* adaptive read-ahead */ + pgoff_t la_index; + pgoff_t ra_index; + pgoff_t lookahead_index; + pgoff_t readahead_index; + unsigned long age; + uint64_t cache_hits; + }; +#endif + }; + + /* mmap read-around */ + unsigned long mmap_hit; /* Cache hit stat for mmap accesses */ + unsigned long mmap_miss; /* Cache miss stat for mmap accesses */ + + /* common ones */ + unsigned long flags; /* ra flags RA_FLAG_xxx*/ + unsigned long prev_page; /* Cache last read() position */ + unsigned long ra_pages; /* Maximum readahead window */ }; #define RA_FLAG_MISS 0x01 /* a cache miss occured against this file */ #define RA_FLAG_INCACHE 0x02 /* file is already in cache */ +#define RA_FLAG_MMAP (1UL<<31) /* mmaped page access */ +#define RA_FLAG_NO_LOOKAHEAD (1UL<<30) /* disable look-ahead */ +#define RA_FLAG_EOF (1UL<<29) /* readahead hits EOF */ struct file { /* diff -puN mm/readahead.c~readahead-state-based-method-routines mm/readahead.c --- 25/mm/readahead.c~readahead-state-based-method-routines Fri May 26 13:34:10 2006 +++ 25-akpm/mm/readahead.c Fri May 26 13:34:10 2006 @@ -832,6 +832,191 @@ static unsigned long node_readahead_agin } /* + * Some helpers for querying/building a read-ahead request. + * + * Diagram for some variable names used frequently: + * + * |<------- la_size ------>| + * +-----------------------------------------+ + * | # | + * +-----------------------------------------+ + * ra_index -->|<---------------- ra_size -------------->| + * + */ + +static enum ra_class ra_class_new(struct file_ra_state *ra) +{ + return ra->flags & RA_CLASS_MASK; +} + +static inline enum ra_class ra_class_old(struct file_ra_state *ra) +{ + return (ra->flags >> RA_CLASS_SHIFT) & RA_CLASS_MASK; +} + +static unsigned long ra_readahead_size(struct file_ra_state *ra) +{ + return ra->readahead_index - ra->ra_index; +} + +static unsigned long ra_lookahead_size(struct file_ra_state *ra) +{ + return ra->readahead_index - ra->lookahead_index; +} + +static unsigned long ra_invoke_interval(struct file_ra_state *ra) +{ + return ra->lookahead_index - ra->la_index; +} + +/* + * The 64bit cache_hits stores three accumulated values and a counter value. + * MSB LSB + * 3333333333333333 : 2222222222222222 : 1111111111111111 : 0000000000000000 + */ +static int ra_cache_hit(struct file_ra_state *ra, int nr) +{ + return (ra->cache_hits >> (nr * 16)) & 0xFFFF; +} + +/* + * Conceptual code: + * ra_cache_hit(ra, 1) += ra_cache_hit(ra, 0); + * ra_cache_hit(ra, 0) = 0; + */ +static void ra_addup_cache_hit(struct file_ra_state *ra) +{ + int n; + + n = ra_cache_hit(ra, 0); + ra->cache_hits -= n; + n <<= 16; + ra->cache_hits += n; +} + +/* + * The read-ahead is deemed success if cache-hit-rate >= 1/readahead_hit_rate. + */ +static int ra_cache_hit_ok(struct file_ra_state *ra) +{ + return ra_cache_hit(ra, 0) * readahead_hit_rate >= + (ra->lookahead_index - ra->la_index); +} + +/* + * Check if @index falls in the @ra request. + */ +static int ra_has_index(struct file_ra_state *ra, pgoff_t index) +{ + if (index < ra->la_index || index >= ra->readahead_index) + return 0; + + if (index >= ra->ra_index) + return 1; + else + return -1; +} + +/* + * Which method is issuing this read-ahead? + */ +static void ra_set_class(struct file_ra_state *ra, + enum ra_class ra_class) +{ + unsigned long flags_mask; + unsigned long flags; + unsigned long old_ra_class; + + flags_mask = ~(RA_CLASS_MASK | (RA_CLASS_MASK << RA_CLASS_SHIFT)); + flags = ra->flags & flags_mask; + + old_ra_class = ra_class_new(ra) << RA_CLASS_SHIFT; + + ra->flags = flags | old_ra_class | ra_class; + + ra_addup_cache_hit(ra); + if (ra_class != RA_CLASS_STATE) + ra->cache_hits <<= 16; + + ra->age = node_readahead_aging(); +} + +/* + * Where is the old read-ahead and look-ahead? + */ +static void ra_set_index(struct file_ra_state *ra, + pgoff_t la_index, pgoff_t ra_index) +{ + ra->la_index = la_index; + ra->ra_index = ra_index; +} + +/* + * Where is the new read-ahead and look-ahead? + */ +static void ra_set_size(struct file_ra_state *ra, + unsigned long ra_size, unsigned long la_size) +{ + /* Disable look-ahead for loopback file. */ + if (unlikely(ra->flags & RA_FLAG_NO_LOOKAHEAD)) + la_size = 0; + + ra->readahead_index = ra->ra_index + ra_size; + ra->lookahead_index = ra->readahead_index - la_size; +} + +/* + * Submit IO for the read-ahead request in file_ra_state. + */ +static int ra_dispatch(struct file_ra_state *ra, + struct address_space *mapping, struct file *filp) +{ + enum ra_class ra_class = ra_class_new(ra); + unsigned long ra_size = ra_readahead_size(ra); + unsigned long la_size = ra_lookahead_size(ra); + pgoff_t eof_index = 1 + DIV_ROUND_UP(i_size_read(mapping->host), + PAGE_CACHE_SIZE); + int actual; + + if (unlikely(ra->ra_index >= eof_index)) + return 0; + + /* Snap to EOF. */ + if (ra->readahead_index + ra_size / 2 > eof_index) { + if (ra_class == RA_CLASS_CONTEXT_AGGRESSIVE && + eof_index > ra->lookahead_index + 1) + la_size = eof_index - ra->lookahead_index; + else + la_size = 0; + ra_size = eof_index - ra->ra_index; + ra_set_size(ra, ra_size, la_size); + ra->flags |= RA_FLAG_EOF; + } + + actual = __do_page_cache_readahead(mapping, filp, + ra->ra_index, ra_size, la_size); + +#ifdef CONFIG_DEBUG_READAHEAD + if (ra->flags & RA_FLAG_MMAP) + ra_account(ra, RA_EVENT_READAHEAD_MMAP, actual); + if (ra->readahead_index == eof_index) + ra_account(ra, RA_EVENT_READAHEAD_EOF, actual); + if (la_size) + ra_account(ra, RA_EVENT_LOOKAHEAD, la_size); + if (ra_size > actual) + ra_account(ra, RA_EVENT_IO_CACHE_HIT, ra_size - actual); + ra_account(ra, RA_EVENT_READAHEAD, actual); + + dprintk("readahead-%s(ino=%lu, index=%lu, ra=%lu+%lu-%lu) = %d\n", + ra_class_name[ra_class], + mapping->host->i_ino, ra->la_index, + ra->ra_index, ra_size, la_size, actual); +#endif /* CONFIG_DEBUG_READAHEAD */ + + return actual; +} + +/* * ra_min is mainly determined by the size of cache memory. Reasonable? * * Table of concrete numbers for 4KB page size: @@ -903,10 +1088,10 @@ static void ra_account(struct file_ra_st return; if (e == RA_EVENT_READAHEAD_HIT && pages < 0) { - c = (ra->flags >> RA_CLASS_SHIFT) & RA_CLASS_MASK; + c = ra_class_old(ra); pages = -pages; } else if (ra) - c = ra->flags & RA_CLASS_MASK; + c = ra_class_new(ra); else c = RA_CLASS_NONE; _ Patches currently in -mm which might be from wfg@xxxxxxxxxxxxxxxx are readahead-kconfig-options.patch radixtree-introduce-__radix_tree_lookup_parent.patch radixtree-introduce-radix_tree_scan_hole.patch mm-introduce-probe_pages.patch mm-introduce-pg_readahead.patch readahead-add-look-ahead-support-to-__do_page_cache_readahead.patch readahead-delay-page-release-in-do_generic_mapping_read.patch readahead-insert-cond_resched-calls.patch readahead-minmax_ra_pages.patch readahead-events-accounting.patch readahead-rescue_pages.patch readahead-sysctl-parameters.patch readahead-min-max-sizes.patch readahead-state-based-method-aging-accounting.patch readahead-state-based-method-routines.patch readahead-state-based-method.patch readahead-context-based-method.patch readahead-initial-method-guiding-sizes.patch readahead-initial-method-thrashing-guard-size.patch readahead-initial-method-expected-read-size.patch readahead-initial-method-user-recommended-size.patch readahead-initial-method.patch readahead-backward-prefetching-method.patch readahead-seeking-reads-method.patch readahead-thrashing-recovery-method.patch readahead-call-scheme.patch readahead-laptop-mode.patch readahead-loop-case.patch readahead-nfsd-case.patch readahead-turn-on-by-default.patch readahead-debug-radix-tree-new-functions.patch readahead-debug-traces-showing-accessed-file-names.patch readahead-debug-traces-showing-read-patterns.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html