From: Nick White <nwhite@xxxxxxxxxxxx> This patch adds a /proc/page_cache file to the procfs. The file contains a line for each inode belonging to a block filesystem which has one or more of its pages in memory (as determined by PageUptodate). This line includes a run-length-encoded bitmap of which pages are up-to-date, starting with pages that aren't (so a string 0,4,2,3 means the first four pages are up-to-date, the next two aren't and the final three are). A sample output (columns are inode, super block id, total number of pages for this inode, run-length-encoded bitmap of which pages are up-to-date, total pages up-to-date, total pages not up-to-date): 148073 sda1 1 0,1 1 0 397793 sda1 4518 0,3,2,1,67,32,231,1,29,2,396,9,32,1,12,2,2613,1,1084 52 44 66 133941 sda1 13 0,4,3,3,3 7 6 173947 sda1 43 0,5,2,1,1,4,1,1,2,1,8,6,1,10 28 15 148499 sda1 4 0,4 4 0 It's currently possible to query this information for a specific file from userspace using mmap / mincore system calls, however this patch solves the slightly different question "What's in the cache?" (not "Is this in the cache?"). The patch is currently missing updates to the procfs documentation, but I'd appreciate any other comments. Thanks - Nick Signed-off-by: Nick White <nwhite@xxxxxxxxxxxx> --- fs/proc/Makefile | 1 + fs/proc/internal.h | 1 + fs/proc/page_cache.c | 269 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 271 insertions(+) diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 712f24d..69cbed8 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -17,6 +17,7 @@ proc-y += devices.o proc-y += interrupts.o proc-y += loadavg.o proc-y += meminfo.o +proc-y += page_cache.o proc-y += stat.o proc-y += uptime.o proc-y += version.o diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 85ff3a4..522beea 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -159,6 +159,7 @@ struct pde_opener { void pde_users_dec(struct proc_dir_entry *pde); extern spinlock_t proc_subdir_lock; +extern spinlock_t inode_sb_list_lock; struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int); int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); diff --git a/fs/proc/page_cache.c b/fs/proc/page_cache.c new file mode 100644 index 0000000..2b709b9 --- /dev/null +++ b/fs/proc/page_cache.c @@ -0,0 +1,269 @@ +#include <linux/fs.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/page-flags.h> +#include <linux/pagevec.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include "internal.h" + +struct page_cache_proc_iter { + struct list_head *sb_ptr; + struct list_head *inode_ptr; +}; + +#define GET_SB(iter) list_entry(iter->sb_ptr, struct super_block, s_list); +#define GET_INODE(iter) list_entry(iter->inode_ptr, struct inode, i_sb_list); + +/* + * prints RLE-ed bitmap for a single inode, starting with zeros & separated + * by commas. + */ +static int proc_page_cache_seq_show(struct seq_file *s, void *v) +{ + struct page_cache_proc_iter *state = v; + struct inode *inode; + unsigned long i, nr_read, gap, index = 0, + nr_uptodate = 0, nr_pages, run_length = 0; + struct pagevec pvec; + bool is_printing = false, ones = false; + struct super_block *sb; + + if (!state) + return -EINVAL; + inode = GET_INODE(state); + sb = GET_SB(state); + + spin_lock(&inode->i_lock); + + if (!inode->i_mapping) + goto end; + + nr_pages = DIV_ROUND_UP(inode->i_size, PAGE_SIZE); + + pagevec_init(&pvec, 0); + + /* we have to process in ~16 page chunks */ + while ((nr_read = pagevec_lookup( + &pvec, + inode->i_mapping, + index, + PAGEVEC_SIZE))) { + for (i = 0; i < nr_read; i++) { + struct page *page = pvec.pages[i]; + + /* the gap is how many pages we've skipped */ + if (index + 1 >= page->index) + gap = 0; + else + gap = index->index - 1 - page; + + if (PageUptodate(page)) { + /* we need to print a 1 */ + if (is_printing) { + if (gap) { + if (ones) { + /* a gap between this one and the last */ + seq_printf(s, ",%lu", run_length); /* ones */ + seq_printf(s, ",%lu", gap); /* zeros */ + run_length = 1; /* back to ones */ + } else { + /* we were printing zeros */ + seq_printf(s, ",%lu", run_length + gap); + run_length = 1; + } + } else { + if (ones) { + /* a consecutive one */ + ++run_length; + } else { + seq_printf(s, ",%lu", run_length); + seq_printf(s, ",%lu", gap); + run_length = 1; + } + } + } else { + /* page->index leading zeros (as this is a one) */ + if (gap) + ++gap; + seq_printf( + s, + "%lu\t%s\t%lu\t%lu", + inode->i_ino, + sb->s_id, + nr_pages, + gap); + is_printing = true; + run_length = 1; + } + ones = true; + ++nr_uptodate; + } else { + if (is_printing) { + if (ones) { + seq_printf(s, ",%lu", run_length); + run_length = 1; + } else { + ++run_length; + } + } + ones = false; + } + + index = page->index; + } + + pagevec_release(&pvec); + ++index; + } + + if (is_printing) { + /* print the last run */ + if (index == nr_pages) { + seq_printf(s, ",%lu", run_length); + } else { + /* gap between last page we processed and end */ + gap = index > nr_pages ? 0 : nr_pages - index; + if (ones) { + /* terminate 1 run and add 0 run */ + seq_printf(s, ",%lu", run_length); + if (gap) + seq_printf(s, ",%lu", gap); + } else { + /* extend 0 run */ + seq_printf(s, ",%lu", gap + run_length); + } + } + seq_printf(s, "\t%lu\t%lu\n", + nr_uptodate, + nr_uptodate > nr_pages ? 0 : nr_pages - nr_uptodate); + } + +end: + spin_unlock(&inode->i_lock); + return 0; +} + +static void *proc_page_cache_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct page_cache_proc_iter *state = v; + struct super_block *sb; + loff_t ignore = 0; + + if (!state) + return NULL; + + sb = GET_SB(state); + if (!sb) + return NULL; + + /* first, try the next inode of this SB */ + spin_lock(&inode_sb_list_lock); + state->inode_ptr = seq_list_next(state->inode_ptr, &sb->s_inodes, pos); + spin_unlock(&inode_sb_list_lock); + if (state->inode_ptr) + return state; + + /* second, try the first inode of the next SB */ + spin_lock(&sb_lock); + while (state->sb_ptr) { + state->sb_ptr = seq_list_next( + state->sb_ptr, + &super_blocks, + &ignore); + if (!state->sb_ptr) { + spin_unlock(&sb_lock); + return NULL; + } + sb = GET_SB(state); + if (sb->s_type->fs_flags & FS_REQUIRES_DEV) + break; + } + spin_unlock(&sb_lock); + + spin_lock(&inode_sb_list_lock); + state->inode_ptr = seq_list_start(&sb->s_inodes, 0); + spin_unlock(&inode_sb_list_lock); + if (state->inode_ptr) + return state; + + /* we've passed the last node of the last SB */ + return NULL; +} + +static void *proc_page_cache_seq_start(struct seq_file *s, loff_t *pos) +{ + struct page_cache_proc_iter *state = kmalloc( + sizeof(struct page_cache_proc_iter), + GFP_KERNEL); + struct super_block *sb; + loff_t ff = *pos, ignore = 0; + + spin_lock(&sb_lock); + state->sb_ptr = seq_list_start(&super_blocks, 0); + sb = GET_SB(state); + while (state->sb_ptr) { + sb = GET_SB(state); + if (sb->s_type->fs_flags & FS_REQUIRES_DEV) + break; + state->sb_ptr = seq_list_next( + state->sb_ptr, + &super_blocks, + &ignore); + } + spin_unlock(&sb_lock); + if (!state->sb_ptr) + return NULL; + + spin_lock(&inode_sb_list_lock); + state->inode_ptr = seq_list_start(&sb->s_inodes, 0); + spin_unlock(&inode_sb_list_lock); + if (!state->inode_ptr) + return NULL; + + while (ff-- > 0 && state) + state = proc_page_cache_seq_next(s, state, &ignore); + + return state; +} + +static void proc_page_cache_seq_stop(struct seq_file *s, void *v) +{ + kfree(v); +} + +static const struct seq_operations proc_page_cache_seq_ops = { + .start = proc_page_cache_seq_start, + .next = proc_page_cache_seq_next, + .stop = proc_page_cache_seq_stop, + .show = proc_page_cache_seq_show +}; + +static int proc_page_cache_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &proc_page_cache_seq_ops); +} + +static const struct file_operations proc_page_cache_file_ops = { + .open = proc_page_cache_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static void __exit proc_page_cache_exit(void) +{ + remove_proc_entry("page_cache", NULL); +} +module_exit(proc_page_cache_exit); + +static int __init proc_page_cache_init(void) +{ + if (proc_create("page_cache", 0, NULL, &proc_page_cache_file_ops)) + return 0; + else + return -ENOMEM; +} +module_init(proc_page_cache_init); +
Attachment:
smime.p7s
Description: S/MIME cryptographic signature