[PATCH] Fix ext2 and ext4 buffer-head accounting.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Pages pinned to block group_descriptors in the super_block are non-reclaimable.
Those pages are showed up as file-backed in meminfo which confuse user program
issuing too many drop_caches/ttfp when this memory will never be freed.

The change has us not account for the file system descriptors by taking the pages
off LRU and decrementing the NR_FILE_PAGES counter. The pages are putting back when
the filesystem is being unmounted.

Signed-off-by: Ying Han <yinghan@xxxxxxxxxx>
Signed-off-by: Paul Turner <pjt@xxxxxxxxxx>
---
 fs/buffer.c                 |   44 +++++++++++++++++++++++++++++++++++++++++++
 fs/ext2/super.c             |   15 +++++++++++++-
 fs/ext4/super.c             |   12 ++++++++++-
 include/linux/buffer_head.h |    5 ++++
 4 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 3e7dca2..677d5f1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -22,6 +22,8 @@
 #include <linux/syscalls.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/memcontrol.h>
+#include <linux/mm_inline.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
 #include <linux/capability.h>
@@ -3314,6 +3316,48 @@ int bh_submit_read(struct buffer_head *bh)
 }
 EXPORT_SYMBOL(bh_submit_read);
 
+void bh_disable_accounting(struct buffer_head *bh)
+{
+	struct page *page = bh->b_page;
+	struct zone *zone = page_zone(page);
+	unsigned long flags;
+
+	if (buffer_unaccounted(bh))
+		return;
+
+	spin_lock_irqsave(&zone->lru_lock, flags);
+	/* If someone else is holding it off-LRU we can't safely do anything */
+	if (PageLRU(page)) {
+		BUG_ON(buffer_unaccounted(bh));
+		ClearPageLRU(page);
+		del_page_from_lru(zone, page);
+		__dec_zone_state(zone, NR_FILE_PAGES);
+		set_buffer_unaccounted(bh);
+	}
+	spin_unlock_irqrestore(&zone->lru_lock, flags);
+}
+EXPORT_SYMBOL(bh_disable_accounting);
+
+void bh_enable_accounting(struct buffer_head *bh)
+{
+	struct page *page = bh->b_page;
+	struct zone *zone = page_zone(page);
+	unsigned long flags;
+
+	if (!buffer_unaccounted(bh))
+		return;
+
+	spin_lock_irqsave(&zone->lru_lock, flags);
+	if (buffer_unaccounted(bh)) {
+		SetPageLRU(page);
+		add_page_to_lru_list(zone, page, LRU_INACTIVE_FILE);
+		__inc_zone_state(zone, NR_FILE_PAGES);
+		clear_buffer_unaccounted(bh);
+	}
+	spin_unlock_irqrestore(&zone->lru_lock, flags);
+}
+EXPORT_SYMBOL(bh_enable_accounting);
+
 void __init buffer_init(void)
 {
 	int nrpages;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 1ec6026..a4d21ce 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -29,6 +29,7 @@
 #include <linux/vfs.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
+#include <linux/swap.h>
 #include <linux/log2.h>
 #include <linux/quotaops.h>
 #include <asm/uaccess.h>
@@ -135,13 +136,16 @@ static void ext2_put_super (struct super_block * sb)
 	}
 	db_count = sbi->s_gdb_count;
 	for (i = 0; i < db_count; i++)
-		if (sbi->s_group_desc[i])
+		if (sbi->s_group_desc[i]) {
+			bh_enable_accounting(sbi->s_group_desc[i]);
 			brelse (sbi->s_group_desc[i]);
+		}
 	kfree(sbi->s_group_desc);
 	kfree(sbi->s_debts);
 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
+	bh_enable_accounting(sbi->s_sbh);
 	brelse (sbi->s_sbh);
 	sb->s_fs_info = NULL;
 	kfree(sbi->s_blockgroup_lock);
@@ -1080,9 +1084,18 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 	if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL))
 		ext2_msg(sb, KERN_WARNING,
 			"warning: mounting ext3 filesystem as ext2");
+
 	if (ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY))
 		sb->s_flags |= MS_RDONLY;
 	ext2_write_super(sb);
+
+	/* disable accounting of pinned file pages */
+	lru_add_drain_all();
+	db_count = sbi->s_gdb_count;
+	for (i = 0; i < db_count; i++)
+		bh_disable_accounting(sbi->s_group_desc[i]);
+	bh_disable_accounting(sbi->s_sbh);
+
 	return 0;
 
 cantfind_ext2:
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2614774..5203476 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -32,6 +32,7 @@
 #include <linux/vfs.h>
 #include <linux/random.h>
 #include <linux/mount.h>
+#include <linux/swap.h>
 #include <linux/namei.h>
 #include <linux/quotaops.h>
 #include <linux/seq_file.h>
@@ -734,8 +735,10 @@ static void ext4_put_super(struct super_block *sb)
 	}
 	kobject_del(&sbi->s_kobj);
 
-	for (i = 0; i < sbi->s_gdb_count; i++)
+	for (i = 0; i < sbi->s_gdb_count; i++) {
+		bh_enable_accounting(sbi->s_group_desc[i]);
 		brelse(sbi->s_group_desc[i]);
+	}
 	kfree(sbi->s_group_desc);
 	if (is_vmalloc_addr(sbi->s_flex_groups))
 		vfree(sbi->s_flex_groups);
@@ -745,6 +748,7 @@ static void ext4_put_super(struct super_block *sb)
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
 	percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
+	bh_enable_accounting(sbi->s_sbh);
 	brelse(sbi->s_sbh);
 #ifdef CONFIG_QUOTA
 	for (i = 0; i < MAXQUOTAS; i++)
@@ -3129,6 +3133,12 @@ no_journal:
 		goto failed_mount4;
 	}
 
+	/* disable accounting of pinned file pages */
+	lru_add_drain_all();
+	for (i = 0; i < db_count; i++)
+		bh_disable_accounting(sbi->s_group_desc[i]);
+	bh_disable_accounting(sbi->s_sbh);
+
 	sbi->s_kobj.kset = ext4_kset;
 	init_completion(&sbi->s_kobj_unregister);
 	err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index ec94c12..7d48499 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -34,6 +34,7 @@ enum bh_state_bits {
 	BH_Write_EIO,	/* I/O error on write */
 	BH_Eopnotsupp,	/* operation not supported (barrier) */
 	BH_Unwritten,	/* Buffer is allocated on disk but not written */
+	BH_Unaccounted, /* Backing page has been removed from accounting */
 	BH_Quiet,	/* Buffer Error Prinks to be quiet */
 
 	BH_PrivateStart,/* not a state bit, but the first bit available
@@ -126,6 +127,7 @@ BUFFER_FNS(Boundary, boundary)
 BUFFER_FNS(Write_EIO, write_io_error)
 BUFFER_FNS(Eopnotsupp, eopnotsupp)
 BUFFER_FNS(Unwritten, unwritten)
+BUFFER_FNS(Unaccounted, unaccounted)
 
 #define bh_offset(bh)		((unsigned long)(bh)->b_data & ~PAGE_MASK)
 #define touch_buffer(bh)	mark_page_accessed(bh->b_page)
@@ -234,6 +236,9 @@ int nobh_truncate_page(struct address_space *, loff_t, get_block_t *);
 int nobh_writepage(struct page *page, get_block_t *get_block,
                         struct writeback_control *wbc);
 
+void bh_disable_accounting(struct buffer_head *bh);
+void bh_enable_accounting(struct buffer_head *bh);
+
 void buffer_init(void);
 
 /*
-- 
1.7.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxxx  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]