[PATCH] procfs: expose page cache contents

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Nick White <nwhite@xxxxxxxxxxxx>

This patch adds a /proc/page_cache file to the procfs. The file contains a
line for each inode belonging to a block filesystem which has one or more
of its pages in memory (as determined by PageUptodate). This line includes
a run-length-encoded bitmap of which pages are up-to-date, starting with
pages that aren't (so a string 0,4,2,3 means the first four pages are
up-to-date, the next two aren't and the final three are).

A sample output (columns are inode, super block id, total number of pages
for this inode, run-length-encoded bitmap of which pages are up-to-date,
total pages up-to-date, total pages not up-to-date):

148073	sda1	1	0,1	1	0
397793	sda1	4518	0,3,2,1,67,32,231,1,29,2,396,9,32,1,12,2,2613,1,1084	52	44
66
133941	sda1	13	0,4,3,3,3	7	6
173947	sda1	43	0,5,2,1,1,4,1,1,2,1,8,6,1,10	28	15
148499	sda1	4	0,4	4	0

It's currently possible to query this information for a specific file from
userspace using mmap / mincore system calls, however this patch solves the
slightly different question "What's in the cache?" (not "Is this in the
cache?").

The patch is currently missing updates to the procfs documentation, but
I'd appreciate any other comments. Thanks -


Nick

Signed-off-by: Nick White <nwhite@xxxxxxxxxxxx>
---
 fs/proc/Makefile     |    1 +
 fs/proc/internal.h   |    1 +
 fs/proc/page_cache.c |  269
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 271 insertions(+)

diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 712f24d..69cbed8 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -17,6 +17,7 @@ proc-y	+= devices.o
 proc-y	+= interrupts.o
 proc-y	+= loadavg.o
 proc-y	+= meminfo.o
+proc-y	+= page_cache.o
 proc-y	+= stat.o
 proc-y	+= uptime.o
 proc-y	+= version.o
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 85ff3a4..522beea 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -159,6 +159,7 @@ struct pde_opener {
 void pde_users_dec(struct proc_dir_entry *pde);
 
 extern spinlock_t proc_subdir_lock;
+extern spinlock_t inode_sb_list_lock;
 
 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry,
unsigned int);
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t
filldir);
diff --git a/fs/proc/page_cache.c b/fs/proc/page_cache.c
new file mode 100644
index 0000000..2b709b9
--- /dev/null
+++ b/fs/proc/page_cache.c
@@ -0,0 +1,269 @@
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/page-flags.h>
+#include <linux/pagevec.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include "internal.h"
+
+struct page_cache_proc_iter {
+	struct list_head *sb_ptr;
+	struct list_head *inode_ptr;
+};
+
+#define GET_SB(iter) list_entry(iter->sb_ptr, struct super_block, s_list);
+#define GET_INODE(iter) list_entry(iter->inode_ptr, struct inode,
i_sb_list);
+
+/*
+ * prints RLE-ed bitmap for a single inode, starting with zeros &
separated
+ * by commas.
+ */
+static int proc_page_cache_seq_show(struct seq_file *s, void *v)
+{
+	struct page_cache_proc_iter *state = v;
+	struct inode *inode;
+	unsigned long i, nr_read, gap, index = 0,
+		nr_uptodate = 0, nr_pages, run_length = 0;
+	struct pagevec pvec;
+	bool is_printing = false, ones = false;
+	struct super_block *sb;
+
+	if (!state)
+		return -EINVAL;
+	inode = GET_INODE(state);
+	sb = GET_SB(state);
+
+	spin_lock(&inode->i_lock);
+
+	if (!inode->i_mapping)
+		goto end;
+
+	nr_pages = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
+
+	pagevec_init(&pvec, 0);
+
+	/* we have to process in ~16 page chunks */
+	while ((nr_read = pagevec_lookup(
+		&pvec,
+		inode->i_mapping,
+		index,
+		PAGEVEC_SIZE))) {
+		for (i = 0; i < nr_read; i++) {
+			struct page *page = pvec.pages[i];
+
+			/* the gap is how many pages we've skipped */
+			if (index + 1 >= page->index)
+				gap = 0;
+			else
+				gap = index->index - 1 - page;
+
+			if (PageUptodate(page)) {
+				/* we need to print a 1 */
+				if (is_printing) {
+					if (gap) {
+						if (ones) {
+							/* a gap between this one and the last */
+							seq_printf(s, ",%lu", run_length); /* ones */
+							seq_printf(s, ",%lu", gap); /* zeros */
+							run_length = 1; /* back to ones */
+						} else {
+							/* we were printing zeros */
+							seq_printf(s, ",%lu", run_length + gap);
+							run_length = 1;
+						}
+					} else {
+						if (ones) {
+							/* a consecutive one */
+							++run_length;
+						} else {
+							seq_printf(s, ",%lu", run_length);
+							seq_printf(s, ",%lu", gap);
+							run_length = 1;
+						}
+					}
+				} else {
+					/* page->index leading zeros (as this is a one) */
+					if (gap)
+						++gap;
+					seq_printf(
+						s,
+						"%lu\t%s\t%lu\t%lu",
+						inode->i_ino,
+						sb->s_id,
+						nr_pages,
+						gap);
+					is_printing = true;
+					run_length = 1;
+				}
+				ones = true;
+				++nr_uptodate;
+			} else {
+				if (is_printing) {
+					if (ones) {
+						seq_printf(s, ",%lu", run_length);
+						run_length = 1;
+					} else {
+						++run_length;
+					}
+				}
+				ones = false;
+			}
+
+			index = page->index;
+		}
+
+		pagevec_release(&pvec);
+		++index;
+	}
+
+	if (is_printing) {
+		/* print the last run */
+		if (index == nr_pages) {
+			seq_printf(s, ",%lu", run_length);
+		} else {
+			/* gap between last page we processed and end */
+			gap = index > nr_pages ? 0 : nr_pages - index;
+			if (ones) {
+				/* terminate 1 run and add 0 run */
+				seq_printf(s, ",%lu", run_length);
+				if (gap)
+					seq_printf(s, ",%lu", gap);
+			} else {
+				/* extend 0 run */
+				seq_printf(s, ",%lu", gap + run_length);
+			}
+		}
+		seq_printf(s, "\t%lu\t%lu\n",
+			nr_uptodate,
+			nr_uptodate > nr_pages ? 0 : nr_pages - nr_uptodate);
+	}
+
+end:
+	spin_unlock(&inode->i_lock);
+	return 0;
+}
+
+static void *proc_page_cache_seq_next(struct seq_file *s, void *v, loff_t
*pos)
+{
+	struct page_cache_proc_iter *state = v;
+	struct super_block *sb;
+	loff_t ignore = 0;
+
+	if (!state)
+		return NULL;
+
+	sb = GET_SB(state);
+	if (!sb)
+		return NULL;
+
+	/* first, try the next inode of this SB */
+	spin_lock(&inode_sb_list_lock);
+	state->inode_ptr = seq_list_next(state->inode_ptr, &sb->s_inodes, pos);
+	spin_unlock(&inode_sb_list_lock);
+	if (state->inode_ptr)
+		return state;
+
+	/* second, try the first inode of the next SB */
+	spin_lock(&sb_lock);
+	while (state->sb_ptr) {
+		state->sb_ptr = seq_list_next(
+			state->sb_ptr,
+			&super_blocks,
+			&ignore);
+		if (!state->sb_ptr) {
+			spin_unlock(&sb_lock);
+			return NULL;
+		}
+		sb = GET_SB(state);
+		if (sb->s_type->fs_flags & FS_REQUIRES_DEV)
+			break;
+	}
+	spin_unlock(&sb_lock);
+
+	spin_lock(&inode_sb_list_lock);
+	state->inode_ptr = seq_list_start(&sb->s_inodes, 0);
+	spin_unlock(&inode_sb_list_lock);
+	if (state->inode_ptr)
+		return state;
+
+	/* we've passed the last node of the last SB */
+	return NULL;
+}
+
+static void *proc_page_cache_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct page_cache_proc_iter *state = kmalloc(
+		sizeof(struct page_cache_proc_iter),
+		GFP_KERNEL);
+	struct super_block *sb;
+	loff_t ff = *pos, ignore = 0;
+
+	spin_lock(&sb_lock);
+	state->sb_ptr = seq_list_start(&super_blocks, 0);
+	sb = GET_SB(state);
+	while (state->sb_ptr) {
+		sb = GET_SB(state);
+		if (sb->s_type->fs_flags & FS_REQUIRES_DEV)
+			break;
+		state->sb_ptr = seq_list_next(
+			state->sb_ptr,
+			&super_blocks,
+			&ignore);
+	}
+	spin_unlock(&sb_lock);
+	if (!state->sb_ptr)
+		return NULL;
+
+	spin_lock(&inode_sb_list_lock);
+	state->inode_ptr = seq_list_start(&sb->s_inodes, 0);
+	spin_unlock(&inode_sb_list_lock);
+	if (!state->inode_ptr)
+		return NULL;
+
+	while (ff-- > 0 && state)
+		state = proc_page_cache_seq_next(s, state, &ignore);
+
+	return state;
+}
+
+static void proc_page_cache_seq_stop(struct seq_file *s, void *v)
+{
+	kfree(v);
+}
+
+static const struct seq_operations proc_page_cache_seq_ops = {
+	.start = proc_page_cache_seq_start,
+	.next  = proc_page_cache_seq_next,
+	.stop  = proc_page_cache_seq_stop,
+	.show  = proc_page_cache_seq_show
+};
+
+static int proc_page_cache_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &proc_page_cache_seq_ops);
+}
+
+static const struct file_operations proc_page_cache_file_ops = {
+	.open		= proc_page_cache_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static void __exit proc_page_cache_exit(void)
+{
+	remove_proc_entry("page_cache", NULL);
+}
+module_exit(proc_page_cache_exit);
+
+static int __init proc_page_cache_init(void)
+{
+	if (proc_create("page_cache", 0, NULL, &proc_page_cache_file_ops))
+		return 0;
+	else
+		return -ENOMEM;
+}
+module_init(proc_page_cache_init);
+

Attachment: smime.p7s
Description: S/MIME cryptographic signature


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux