[PATCH] reusing of mapping page supplies a way for file page allocation under low memory due to pagecache over size and is controlled by sysctl parameters. it is used only for rw page allocation rather than fault or readahead allocation. it is like relclaim but is lighter than reclaim. it only reuses clean and zero mapcount pages of mapping. for special filesystems using this feature like below:

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: z00281421 <z00281421@xxxxxxxxxxxxxxxxxxxx>

const struct address_space_operations special_aops = {
    ...
	.reuse_mapping_page = generic_reuse_mapping_page,
}

Signed-off-by: z00281421 <z00281421@xxxxxxxxxxxxxxxxxxxx>
---
 fs/buffer.c                   |    2 +
 include/linux/fs.h            |    7 ++++
 include/linux/pagemap.h       |   36 ++++++++++++++++
 include/linux/radix-tree.h    |    2 +-
 include/linux/vm_event_item.h |    2 +-
 kernel/sysctl.c               |    9 ++++
 mm/filemap.c                  |   93 +++++++++++++++++++++++++++++++++++++++--
 mm/page-writeback.c           |    6 +++
 mm/vmstat.c                   |    1 +
 9 files changed, 153 insertions(+), 5 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 754813a..a212720 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -634,6 +634,8 @@ static void __set_page_dirty(struct page *page, struct address_space *mapping,
 		account_page_dirtied(page, mapping);
 		radix_tree_tag_set(&mapping->page_tree,
 				page_index(page), PAGECACHE_TAG_DIRTY);
+		radix_tree_tag_clear(&mapping->page_tree,
+				page_index(page), PAGECACHE_TAG_REUSE);
 	}
 	spin_unlock_irqrestore(&mapping->tree_lock, flags);
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dd28814..a2e33e0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -68,6 +68,7 @@ extern struct inodes_stat_t inodes_stat;
 extern int leases_enable, lease_break_time;
 extern int sysctl_protected_symlinks;
 extern int sysctl_protected_hardlinks;
+extern unsigned long sysctl_cache_reuse_ratio;
 
 struct buffer_head;
 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
@@ -412,6 +413,9 @@ struct address_space_operations {
 	int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
 				sector_t *span);
 	void (*swap_deactivate)(struct file *file);
+
+	/* reuse mapping page support */
+	struct page *(*reuse_mapping_page)(struct address_space *, gfp_t);
 };
 
 extern const struct address_space_operations empty_aops;
@@ -497,6 +501,7 @@ struct block_device {
 #define PAGECACHE_TAG_DIRTY	0
 #define PAGECACHE_TAG_WRITEBACK	1
 #define PAGECACHE_TAG_TOWRITE	2
+#define PAGECACHE_TAG_REUSE	3
 
 int mapping_tagged(struct address_space *mapping, int tag);
 
@@ -2762,6 +2767,8 @@ extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
+extern struct page *generic_reuse_mapping_page(struct address_space *mapping,
+		gfp_t gfp_mask);
 
 ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos);
 ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 9735410..c454dbb 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -76,6 +76,16 @@ static inline gfp_t mapping_gfp_constraint(struct address_space *mapping,
 	return mapping_gfp_mask(mapping) & gfp_mask;
 }
 
+static inline struct page *mapping_reuse_page(struct address_space *mapping,
+				gfp_t gfp_mask)
+{
+	if (unlikely(mapping_unevictable(mapping)))
+		return NULL;
+	if (mapping->a_ops->reuse_mapping_page)
+		return mapping->a_ops->reuse_mapping_page(mapping, gfp_mask);
+	return NULL;
+}
+
 /*
  * This is non-atomic.  Only to be used before the mapping is activated.
  * Probably needs a barrier...
@@ -201,11 +211,21 @@ static inline struct page *__page_cache_alloc(gfp_t gfp)
 
 static inline struct page *page_cache_alloc(struct address_space *x)
 {
+	struct page *page;
+
+	page = mapping_reuse_page(x, mapping_gfp_mask(x));
+	if (page)
+		return page;
 	return __page_cache_alloc(mapping_gfp_mask(x));
 }
 
 static inline struct page *page_cache_alloc_cold(struct address_space *x)
 {
+	struct page *page;
+
+	page = mapping_reuse_page(x, mapping_gfp_mask(x)|__GFP_COLD);
+	if (page)
+		return page;
 	return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
 }
 
@@ -215,6 +235,22 @@ static inline struct page *page_cache_alloc_readahead(struct address_space *x)
 				  __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN);
 }
 
+static inline struct page *page_cache_alloc_fault(struct address_space *x)
+{
+	return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
+}
+
+static inline struct page *page_cache_alloc_reuse(struct address_space *x,
+			     gfp_t gfp)
+{
+	struct page *page;
+
+	page = mapping_reuse_page(x, gfp);
+	if (page)
+		return page;
+	return __page_cache_alloc(gfp);
+}
+
 typedef int filler_t(void *, struct page *);
 
 pgoff_t page_cache_next_hole(struct address_space *mapping,
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index cb4b7e8..2e5aa47 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -64,7 +64,7 @@ static inline bool radix_tree_is_internal_node(void *ptr)
 
 /*** radix-tree API starts here ***/
 
-#define RADIX_TREE_MAX_TAGS 3
+#define RADIX_TREE_MAX_TAGS 4
 
 #ifndef RADIX_TREE_MAP_SHIFT
 #define RADIX_TREE_MAP_SHIFT	(CONFIG_BASE_SMALL ? 4 : 6)
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index ec08432..a34b456 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -37,7 +37,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 #endif
 		PGINODESTEAL, SLABS_SCANNED, KSWAPD_INODESTEAL,
 		KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
-		PAGEOUTRUN, ALLOCSTALL, PGROTATED,
+		PAGEOUTRUN, ALLOCSTALL, PGROTATED, PGREUSED,
 		DROP_PAGECACHE, DROP_SLAB,
 #ifdef CONFIG_NUMA_BALANCING
 		NUMA_PTE_UPDATES,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 87b2fc3..35e3c7d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1334,6 +1334,15 @@ static struct ctl_table vm_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one_hundred,
 	},
+	{
+		.procname	= "cache_reuse_ratio",
+		.data		= &sysctl_cache_reuse_ratio,
+		.maxlen		= sizeof(sysctl_cache_reuse_ratio),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one_hundred,
+	},
 #ifdef CONFIG_HUGETLB_PAGE
 	{
 		.procname	= "nr_hugepages",
diff --git a/mm/filemap.c b/mm/filemap.c
index 00ae878..f0fed97 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -47,6 +47,8 @@
 
 #include <asm/mman.h>
 
+unsigned long sysctl_cache_reuse_ratio = 100;
+
 /*
  * Shared mappings implemented 30.11.1994. It's not fully working yet,
  * though.
@@ -110,6 +112,18 @@
  *   ->tasklist_lock            (memory_failure, collect_procs_ao)
  */
 
+static unsigned long page_cache_over_reuse_limit(void)
+{
+	unsigned long lru_file, limit;
+
+	limit = totalram_pages * sysctl_cache_reuse_ratio / 100;
+	lru_file = global_page_state(NR_ACTIVE_FILE)
+		+ global_page_state(NR_INACTIVE_FILE);
+	if (lru_file > limit)
+		return lru_file - limit;
+	return 0;
+}
+
 static void page_cache_tree_delete(struct address_space *mapping,
 				   struct page *page, void *shadow)
 {
@@ -1194,7 +1208,7 @@ no_page:
 		if (fgp_flags & FGP_NOFS)
 			gfp_mask &= ~__GFP_FS;
 
-		page = __page_cache_alloc(gfp_mask);
+		page = page_cache_alloc_reuse(mapping, gfp_mask);
 		if (!page)
 			return NULL;
 
@@ -1784,6 +1798,13 @@ readpage:
 			unlock_page(page);
 		}
 
+		if (!page_mapcount(page)) {
+			spin_lock_irq(&mapping->tree_lock);
+			radix_tree_tag_set(&mapping->page_tree,
+					index, PAGECACHE_TAG_REUSE);
+			spin_unlock_irq(&mapping->tree_lock);
+		}
+
 		goto page_ok;
 
 readpage_error:
@@ -1899,7 +1920,7 @@ static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
 	int ret;
 
 	do {
-		page = __page_cache_alloc(gfp_mask|__GFP_COLD);
+		page = page_cache_alloc_fault(mapping, gfp_mask|__GFP_COLD);
 		if (!page)
 			return -ENOMEM;
 
@@ -2270,6 +2291,72 @@ int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma)
 EXPORT_SYMBOL(generic_file_mmap);
 EXPORT_SYMBOL(generic_file_readonly_mmap);
 
+struct page *generic_reuse_mapping_page(struct address_space *mapping,
+				gfp_t gfp_mask)
+{
+	int i;
+	pgoff_t index = 0;
+	struct page *p = NULL;
+	struct pagevec pvec;
+
+	if (!page_cache_over_reuse_limit())
+		return NULL;
+	if (unlikely(!mapping->nrpages))
+		return NULL;
+	if (!mapping_tagged(mapping, PAGECACHE_TAG_REUSE))
+		return NULL;
+	lru_add_drain();
+	pagevec_init(&pvec, 0);
+	while (!p && pagevec_lookup_tag(&pvec, mapping, &index,
+		PAGECACHE_TAG_REUSE, PAGEVEC_SIZE)) {
+		for (i = 0; i < pagevec_count(&pvec); i++) {
+			struct page *page = pvec.pages[i];
+
+			if (PageActive(page))
+				continue;
+			if (PageDirty(page))
+				continue;
+			if (page_mapcount(page))
+				continue;
+			if (!trylock_page(page))
+				continue;
+			if (unlikely(page_mapping(page) != mapping)) {
+				unlock_page(page);
+				continue;
+			}
+			if (invalidate_inode_page(page)) {
+				if (likely(!isolate_lru_page(page))) {
+					get_page(page);
+					ClearPageUptodate(page);
+					WARN_ON(TestClearPageDirty(page));
+					WARN_ON(TestClearPageWriteback(page));
+					WARN_ON(TestClearPageActive(page));
+					WARN_ON(TestClearPageUnevictable(page));
+					ClearPageError(page);
+					ClearPageReferenced(page);
+					ClearPageReclaim(page);
+					ClearPageMappedToDisk(page);
+					ClearPageReadahead(page);
+					unlock_page(page);
+					count_vm_event(PGREUSED);
+					p = page;
+					break;
+				}
+			} else {
+				spin_lock_irq(&mapping->tree_lock);
+				radix_tree_tag_clear(&mapping->page_tree,
+					page->index, PAGECACHE_TAG_REUSE);
+				spin_unlock_irq(&mapping->tree_lock);
+			}
+			unlock_page(page);
+		}
+		pagevec_release(&pvec);
+		cond_resched();
+	}
+	return p;
+}
+EXPORT_SYMBOL(generic_reuse_mapping_page);
+
 static struct page *wait_on_page_read(struct page *page)
 {
 	if (!IS_ERR(page)) {
@@ -2293,7 +2380,7 @@ static struct page *do_read_cache_page(struct address_space *mapping,
 repeat:
 	page = find_get_page(mapping, index);
 	if (!page) {
-		page = __page_cache_alloc(gfp | __GFP_COLD);
+		page = page_cache_alloc_reuse(mapping, gfp | __GFP_COLD);
 		if (!page)
 			return ERR_PTR(-ENOMEM);
 		err = add_to_page_cache_lru(page, mapping, index, gfp);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b9956fd..0709df3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2490,6 +2490,8 @@ int __set_page_dirty_nobuffers(struct page *page)
 		account_page_dirtied(page, mapping);
 		radix_tree_tag_set(&mapping->page_tree, page_index(page),
 				   PAGECACHE_TAG_DIRTY);
+		radix_tree_tag_clear(&mapping->page_tree, page_index(page),
+				   PAGECACHE_TAG_REUSE);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
 		unlock_page_memcg(page);
 
@@ -2737,6 +2739,10 @@ int test_clear_page_writeback(struct page *page)
 			radix_tree_tag_clear(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_WRITEBACK);
+			if (!PageSwapBacked(page) && !page_mapcount(page))
+				radix_tree_tag_set(&mapping->page_tree,
+							page_index(page),
+							PAGECACHE_TAG_REUSE);
 			if (bdi_cap_account_writeback(bdi)) {
 				struct bdi_writeback *wb = inode_to_wb(inode);
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 77e42ef..273cd1d 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -773,6 +773,7 @@ const char * const vmstat_text[] = {
 	"allocstall",
 
 	"pgrotated",
+	"pgreused",
 
 	"drop_pagecache",
 	"drop_slab",
-- 
1.7.9.5

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]