The accounting code will be compiled in by default (CONFIG_READAHEAD_STATS=y), and will remain inactive by default. It can be runtime enabled/disabled through the debugfs interface echo 1 > /debug/readahead/stats_enable echo 0 > /debug/readahead/stats_enable Example output: (taken from a fresh booted NFS-ROOT console box with rsize=524288) $ cat /debug/readahead/stats pattern readahead eof_hit cache_hit io sync_io mmap_io meta_io size async_size io_size initial 702 511 0 692 692 0 0 2 0 2 subsequent 7 0 1 7 1 1 0 23 22 23 context 160 161 0 2 0 1 0 0 0 16 around 184 184 177 184 184 184 0 58 0 53 backwards 2 0 2 2 2 0 0 4 0 3 fadvise 2593 47 8 2588 2588 0 0 1 0 1 oversize 0 0 0 0 0 0 0 0 0 0 random 45 20 0 44 44 0 0 1 0 1 all 3697 923 188 3519 3511 186 0 4 0 4 The two most important columns are - io number of readahead IO - io_size average readahead IO size CC: Ingo Molnar <mingo@xxxxxxx> CC: Jens Axboe <axboe@xxxxxxxxx> CC: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx> Acked-by: Rik van Riel <riel@xxxxxxxxxx> Signed-off-by: Wu Fengguang <fengguang.wu@xxxxxxxxx> --- mm/Kconfig | 15 +++ mm/readahead.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 217 insertions(+) --- linux-next.orig/mm/readahead.c 2012-01-25 15:57:52.000000000 +0800 +++ linux-next/mm/readahead.c 2012-01-25 15:57:53.000000000 +0800 @@ -33,6 +33,202 @@ EXPORT_SYMBOL_GPL(file_ra_state_init); #define list_to_page(head) (list_entry((head)->prev, struct page, lru)) +#ifdef CONFIG_READAHEAD_STATS +#include <linux/ftrace_event.h> +#include <linux/seq_file.h> +#include <linux/debugfs.h> + +static u32 readahead_stats_enable __read_mostly; + +static const struct trace_print_flags ra_pattern_names[] = { + READAHEAD_PATTERNS +}; + +enum ra_account { + /* number of readaheads */ + RA_ACCOUNT_COUNT, /* readahead request */ + RA_ACCOUNT_EOF, /* readahead request covers EOF */ + RA_ACCOUNT_CACHE_HIT, /* readahead request covers some cached pages */ + RA_ACCOUNT_IOCOUNT, /* readahead IO */ + RA_ACCOUNT_SYNC, /* readahead IO that is synchronous */ + RA_ACCOUNT_MMAP, /* readahead IO by mmap page faults */ + RA_ACCOUNT_METADATA, /* readahead IO on metadata */ + /* number of readahead pages */ + RA_ACCOUNT_SIZE, /* readahead size */ + RA_ACCOUNT_ASYNC_SIZE, /* readahead async size */ + RA_ACCOUNT_ACTUAL, /* readahead actual IO size */ + /* end mark */ + RA_ACCOUNT_MAX, +}; + +#define RA_STAT_BATCH (INT_MAX / 2) +static struct percpu_counter ra_stat[RA_PATTERN_ALL][RA_ACCOUNT_MAX]; + +static inline void add_ra_stat(int i, int j, s64 amount) +{ + __percpu_counter_add(&ra_stat[i][j], amount, RA_STAT_BATCH); +} + +static inline void inc_ra_stat(int i, int j) +{ + add_ra_stat(i, j, 1); +} + +static void readahead_stats(struct address_space *mapping, + pgoff_t offset, + unsigned long req_size, + bool for_mmap, + bool for_metadata, + enum readahead_pattern pattern, + pgoff_t start, + unsigned long size, + unsigned long async_size, + int actual) +{ + pgoff_t eof = ((i_size_read(mapping->host)-1) >> PAGE_CACHE_SHIFT) + 1; + + inc_ra_stat(pattern, RA_ACCOUNT_COUNT); + add_ra_stat(pattern, RA_ACCOUNT_SIZE, size); + add_ra_stat(pattern, RA_ACCOUNT_ASYNC_SIZE, async_size); + add_ra_stat(pattern, RA_ACCOUNT_ACTUAL, actual); + + if (start + size >= eof) + inc_ra_stat(pattern, RA_ACCOUNT_EOF); + if (actual < size) + inc_ra_stat(pattern, RA_ACCOUNT_CACHE_HIT); + + if (actual) { + inc_ra_stat(pattern, RA_ACCOUNT_IOCOUNT); + + if (start <= offset && offset < start + size) + inc_ra_stat(pattern, RA_ACCOUNT_SYNC); + + if (for_mmap) + inc_ra_stat(pattern, RA_ACCOUNT_MMAP); + if (for_metadata) + inc_ra_stat(pattern, RA_ACCOUNT_METADATA); + } +} + +static void readahead_stats_reset(void) +{ + int i, j; + + for (i = 0; i < RA_PATTERN_ALL; i++) + for (j = 0; j < RA_ACCOUNT_MAX; j++) + percpu_counter_set(&ra_stat[i][j], 0); +} + +static void +readahead_stats_sum(long long ra_stats[RA_PATTERN_MAX][RA_ACCOUNT_MAX]) +{ + int i, j; + + for (i = 0; i < RA_PATTERN_ALL; i++) + for (j = 0; j < RA_ACCOUNT_MAX; j++) { + s64 n = percpu_counter_sum(&ra_stat[i][j]); + ra_stats[i][j] += n; + ra_stats[RA_PATTERN_ALL][j] += n; + } +} + +static int readahead_stats_show(struct seq_file *s, void *_) +{ + long long ra_stats[RA_PATTERN_MAX][RA_ACCOUNT_MAX]; + int i; + + seq_printf(s, + "%-10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s\n", + "pattern", "readahead", "eof_hit", "cache_hit", + "io", "sync_io", "mmap_io", "meta_io", + "size", "async_size", "io_size"); + + memset(ra_stats, 0, sizeof(ra_stats)); + readahead_stats_sum(ra_stats); + + for (i = 0; i < RA_PATTERN_MAX; i++) { + unsigned long count = ra_stats[i][RA_ACCOUNT_COUNT]; + unsigned long iocount = ra_stats[i][RA_ACCOUNT_IOCOUNT]; + /* + * avoid division-by-zero + */ + if (count == 0) + count = 1; + if (iocount == 0) + iocount = 1; + + seq_printf(s, "%-10s %10lld %10lld %10lld %10lld %10lld " + "%10lld %10lld %10lld %10lld %10lld\n", + ra_pattern_names[i].name, + ra_stats[i][RA_ACCOUNT_COUNT], + ra_stats[i][RA_ACCOUNT_EOF], + ra_stats[i][RA_ACCOUNT_CACHE_HIT], + ra_stats[i][RA_ACCOUNT_IOCOUNT], + ra_stats[i][RA_ACCOUNT_SYNC], + ra_stats[i][RA_ACCOUNT_MMAP], + ra_stats[i][RA_ACCOUNT_METADATA], + ra_stats[i][RA_ACCOUNT_SIZE] / count, + ra_stats[i][RA_ACCOUNT_ASYNC_SIZE] / count, + ra_stats[i][RA_ACCOUNT_ACTUAL] / iocount); + } + + return 0; +} + +static int readahead_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, readahead_stats_show, NULL); +} + +static ssize_t readahead_stats_write(struct file *file, const char __user *buf, + size_t size, loff_t *offset) +{ + readahead_stats_reset(); + return size; +} + +static const struct file_operations readahead_stats_fops = { + .owner = THIS_MODULE, + .open = readahead_stats_open, + .write = readahead_stats_write, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init readahead_create_debugfs(void) +{ + struct dentry *root; + struct dentry *entry; + int i, j; + + root = debugfs_create_dir("readahead", NULL); + if (!root) + goto out; + + entry = debugfs_create_file("stats", 0644, root, + NULL, &readahead_stats_fops); + if (!entry) + goto out; + + entry = debugfs_create_bool("stats_enable", 0644, root, + &readahead_stats_enable); + if (!entry) + goto out; + + for (i = 0; i < RA_PATTERN_ALL; i++) + for (j = 0; j < RA_ACCOUNT_MAX; j++) + percpu_counter_init(&ra_stat[i][j], 0); + + return 0; +out: + printk(KERN_ERR "readahead: failed to create debugfs entries\n"); + return -ENOMEM; +} + +late_initcall(readahead_create_debugfs); +#endif + static inline void readahead_event(struct address_space *mapping, pgoff_t offset, unsigned long req_size, @@ -44,6 +240,12 @@ static inline void readahead_event(struc unsigned long async_size, int actual) { +#ifdef CONFIG_READAHEAD_STATS + if (readahead_stats_enable) + readahead_stats(mapping, offset, req_size, + for_mmap, for_metadata, + pattern, start, size, async_size, actual); +#endif trace_readahead(mapping, offset, req_size, pattern, start, size, async_size, actual); } --- linux-next.orig/mm/Kconfig 2012-01-25 15:57:46.000000000 +0800 +++ linux-next/mm/Kconfig 2012-01-25 15:57:53.000000000 +0800 @@ -379,3 +379,18 @@ config CLEANCACHE in a negligible performance hit. If unsure, say Y to enable cleancache + +config READAHEAD_STATS + bool "Collect page cache readahead stats" + depends on DEBUG_FS + default n + help + This provides the readahead events accounting facilities. + + To do readahead accounting for a workload: + + echo 1 > /sys/kernel/debug/readahead/stats_enable + echo 0 > /sys/kernel/debug/readahead/stats # reset counters + # run the workload + cat /sys/kernel/debug/readahead/stats # check counters + echo 0 > /sys/kernel/debug/readahead/stats_enable -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html