[PATCH 2/4] fs/proc/page.c: introduce /proc/kpagecache interface

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



/proc/pid/pagemap is one of powerful analyzing and testing features about
page mapping. This is also useful to know about page status combined with
/proc/kpageflag or /proc/kpagecount. One missing is the similar interface to
scan over pagecache of a given file without opening it or mapping it to
virtual address, which could impact other workloads. So this patch provides it.

Usage is simple: 1) write a file path to be scanned into the interface,
and 2) read 64-bit entries, each of which is associated with the page on
each page index.

Good in-kernel tree example is tools/vm/page-types.c (some code added on
it in the later patch.)

Signed-off-by: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx>
---
 fs/proc/page.c     | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |   9 +++--
 2 files changed, 111 insertions(+), 3 deletions(-)

diff --git v3.15-rc5.orig/fs/proc/page.c v3.15-rc5/fs/proc/page.c
index e647c55275d9..d6fe458016e0 100644
--- v3.15-rc5.orig/fs/proc/page.c
+++ v3.15-rc5/fs/proc/page.c
@@ -9,6 +9,8 @@
 #include <linux/seq_file.h>
 #include <linux/hugetlb.h>
 #include <linux/kernel-page-flags.h>
+#include <linux/path.h>
+#include <linux/namei.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
@@ -212,10 +214,113 @@ static const struct file_operations proc_kpageflags_operations = {
 	.read = kpageflags_read,
 };
 
+static struct path kpagecache_path;
+
+#define KPC_TAGS_BITS	__NR_PAGECACHE_TAGS
+#define KPC_TAGS_OFFSET	(64 - KPC_TAGS_BITS)
+#define KPC_TAGS_MASK	(((1LL << KPC_TAGS_BITS) - 1) << KPC_TAGS_OFFSET)
+#define KPC_TAGS(bits)	(((bits) << KPC_TAGS_OFFSET) & KPC_TAGS_MASK)
+/* a few bits remaining between two fields. */
+#define KPC_PFN_BITS	(64 - PAGE_CACHE_SHIFT)
+#define KPC_PFN_MASK	((1LL << KPC_PFN_BITS) - 1)
+#define KPC_PFN(pfn)	((pfn) & KPC_PFN_MASK)
+
+static u64 get_pagecache_tags(struct radix_tree_root *root, unsigned long index)
+{
+	int i;
+	unsigned long tags = 0;
+	for (i = 0; i < __NR_PAGECACHE_TAGS; i++)
+		if (radix_tree_tag_get(root, index, i))
+			tags |=  1 << i;
+	return KPC_TAGS(tags);
+}
+
+static ssize_t kpagecache_read(struct file *file, char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	u64 __user *out = (u64 __user *)buf;
+	unsigned long src = *ppos;
+	struct address_space *mapping;
+	loff_t size;
+	pgoff_t index;
+	struct radix_tree_iter iter;
+	void **slot;
+	ssize_t ret = 0;
+
+	if (!kpagecache_path.dentry)
+		return 0;
+	if (src & KPMMASK || count & KPMMASK)
+		return -EINVAL;
+	mapping = kpagecache_path.dentry->d_inode->i_mapping;
+	size = i_size_read(mapping->host);
+	if (!size)
+		return 0;
+	size = (size - 1) >> PAGE_CACHE_SHIFT;
+	index = src / KPMSIZE;
+	count = min_t(unsigned long, count, ((size + 1) * KPMSIZE) - src);
+
+	rcu_read_lock();
+	radix_tree_for_each_slot(slot, &mapping->page_tree,
+				 &iter, index, index + count / KPMSIZE - 1) {
+		struct page *page = radix_tree_deref_slot(slot);
+		u64 entry;
+		if (unlikely(!page))
+			continue;
+		entry = get_pagecache_tags(&mapping->page_tree, iter.index);
+		entry |= KPC_PFN(page_to_pfn(page));
+		count = (iter.index - index + 1) * KPMSIZE;
+		if (put_user(entry, out + iter.index - index))
+			break;
+	}
+	rcu_read_unlock();
+	*ppos += count;
+	if (!ret)
+		ret = count;
+	return ret;
+}
+
+static ssize_t kpagecache_write(struct file *file, const char __user *pathname,
+			       size_t count, loff_t *ppos)
+{
+	struct path path;
+	int err;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (!pathname) {
+		if (kpagecache_path.dentry) {
+			path_put(&kpagecache_path);
+			kpagecache_path.mnt = NULL;
+			kpagecache_path.dentry = NULL;
+		}
+		return count;
+	}
+
+	err = user_path_at(AT_FDCWD, pathname, LOOKUP_FOLLOW, &path);
+	if (err)
+		return -EINVAL;
+	if (kpagecache_path.dentry != path.dentry) {
+		path_put(&kpagecache_path);
+		kpagecache_path.mnt = path.mnt;
+		kpagecache_path.dentry = path.dentry;
+	} else
+		path_put(&path);
+	return count;
+}
+
+static const struct file_operations proc_kpagecache_operations = {
+	.llseek		= mem_lseek,
+	.read		= kpagecache_read,
+	.write		= kpagecache_write,
+};
+
 static int __init proc_page_init(void)
 {
 	proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations);
 	proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations);
+	proc_create("kpagecache", S_IRUSR|S_IWUSR, NULL,
+			&proc_kpagecache_operations);
 	return 0;
 }
 fs_initcall(proc_page_init);
diff --git v3.15-rc5.orig/include/linux/fs.h v3.15-rc5/include/linux/fs.h
index 878031227c57..5b489df9d964 100644
--- v3.15-rc5.orig/include/linux/fs.h
+++ v3.15-rc5/include/linux/fs.h
@@ -447,9 +447,12 @@ struct block_device {
  * Radix-tree tags, for tagging dirty and writeback pages within the pagecache
  * radix trees
  */
-#define PAGECACHE_TAG_DIRTY	0
-#define PAGECACHE_TAG_WRITEBACK	1
-#define PAGECACHE_TAG_TOWRITE	2
+enum {
+	PAGECACHE_TAG_DIRTY,
+	PAGECACHE_TAG_WRITEBACK,
+	PAGECACHE_TAG_TOWRITE,
+	__NR_PAGECACHE_TAGS,
+};
 
 int mapping_tagged(struct address_space *mapping, int tag);
 
-- 
1.9.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]