From: Khalid Mughal <khalidm@xxxxxxxxx> Currently there is no way to figure out the droppable pagecache size from the meminfo output. The MemFree size can shrink during normal system operation, when some of the memory pages get cached and is reflected in "Cached" field. Similarly for file operations some of the buffer memory gets cached and it is reflected in "Buffers" field. The kernel automatically reclaims all this cached & buffered memory, when it is needed elsewhere on the system. The only way to manually reclaim this memory is by writing 1 to /proc/sys/vm/drop_caches. But this can have performance impact. Since it discards cached objects, it may cause high CPU & I/O utilization to recreate the dropped objects during heavy system load. This patch computes the droppable pagecache count, using same algorithm as "vm/drop_caches". It is non-destructive and does not drop any pages. Therefore it does not have any impact on system performance. The computation does not include the size of reclaimable slab. Cc: xe-kernel@xxxxxxxxxxxxxxxxxx Cc: dave.hansen@xxxxxxxxx Cc: hannes@xxxxxxxxxxx Cc: riel@xxxxxxxxxx Signed-off-by: Khalid Mughal <khalidm@xxxxxxxxx> Signed-off-by: Daniel Walker <danielwa@xxxxxxxxx> --- Documentation/sysctl/vm.txt | 12 +++++++ fs/drop_caches.c | 80 +++++++++++++++++++++++++++++++++++++++++++-- include/linux/mm.h | 3 ++ kernel/sysctl.c | 7 ++++ 4 files changed, 100 insertions(+), 2 deletions(-) diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 89a887c..13a501c 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -29,6 +29,7 @@ Currently, these files are in /proc/sys/vm: - dirty_ratio - dirty_writeback_centisecs - drop_caches +- drop_caches_count - extfrag_threshold - hugepages_treat_as_movable - hugetlb_shm_group @@ -224,6 +225,17 @@ with your system. To disable them, echo 4 (bit 3) into drop_caches. ============================================================== +drop_caches_count + +The amount of droppable pagecache (in kilobytes). Reading this file +performs same calculation as writing 1 to /proc/sys/vm/drop_caches. +The actual pages are not dropped during computation of this value. + +To read the value: + cat /proc/sys/vm/drop_caches_count + +============================================================== + extfrag_threshold This parameter affects whether the kernel will compact memory or direct diff --git a/fs/drop_caches.c b/fs/drop_caches.c index d72d52b..0cb2186 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -8,12 +8,73 @@ #include <linux/writeback.h> #include <linux/sysctl.h> #include <linux/gfp.h> +#include <linux/init.h> +#include <linux/mman.h> +#include <linux/pagemap.h> +#include <linux/pagevec.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/vmstat.h> +#include <linux/blkdev.h> + #include "internal.h" /* A global variable is a bit ugly, but it keeps the code simple */ + int sysctl_drop_caches; +unsigned int sysctl_drop_caches_count; + +static int is_page_droppable(struct page *page) +{ + struct address_space *mapping = page_mapping(page); + + if (!mapping) + return 0; + if (PageDirty(page)) + return 0; + if (PageWriteback(page)) + return 0; + if (page_mapped(page)) + return 0; + if (page->mapping != mapping) + return 0; + if (page_has_private(page)) + return 0; + return 1; +} + +static unsigned long count_unlocked_pages(struct address_space *mapping) +{ + struct pagevec pvec; + pgoff_t start = 0; + pgoff_t end = -1; + unsigned long count = 0; + int i; + int rc; + + pagevec_init(&pvec, 0); + while (start <= end && pagevec_lookup(&pvec, mapping, start, + min(end - start, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; + start = page->index; + if (start > end) + break; + if (!trylock_page(page)) + continue; + WARN_ON(page->index != start); + rc = is_page_droppable(page); + unlock_page(page); + count += rc; + } + pagevec_release(&pvec); + cond_resched(); + start++; + } + return count; +} -static void drop_pagecache_sb(struct super_block *sb, void *unused) +static void drop_pagecache_sb(struct super_block *sb, void *count) { struct inode *inode, *toput_inode = NULL; @@ -29,7 +90,11 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) spin_unlock(&inode->i_lock); spin_unlock(&sb->s_inode_list_lock); - invalidate_mapping_pages(inode->i_mapping, 0, -1); + if (count) + sysctl_drop_caches_count += count_unlocked_pages(inode->i_mapping); + else + invalidate_mapping_pages(inode->i_mapping, 0, -1); + iput(toput_inode); toput_inode = inode; @@ -67,3 +132,14 @@ int drop_caches_sysctl_handler(struct ctl_table *table, int write, } return 0; } + +int drop_caches_count_sysctl_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int ret = 0; + sysctl_drop_caches_count = nr_blockdev_pages(); + iterate_supers(drop_pagecache_sb, &sysctl_drop_caches_count); + sysctl_drop_caches_count <<= (PAGE_SHIFT - 10); /* count in KBytes */ + ret = proc_dointvec_minmax(table, write, buffer, length, ppos); + return ret; +} diff --git a/include/linux/mm.h b/include/linux/mm.h index f1cd22f..02ebd41 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2220,8 +2220,11 @@ static inline int in_gate_area(struct mm_struct *mm, unsigned long addr) #ifdef CONFIG_SYSCTL extern int sysctl_drop_caches; +extern unsigned int sysctl_drop_caches_count; int drop_caches_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +int drop_caches_count_sysctl_handler(struct ctl_table *, int, + void __user *, size_t *, loff_t *); #endif void drop_slab(void); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 97715fd..c043175 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1356,6 +1356,13 @@ static struct ctl_table vm_table[] = { .extra1 = &one, .extra2 = &four, }, + { + .procname = "drop_caches_count", + .data = &sysctl_drop_caches_count, + .maxlen = sizeof(unsigned int), + .mode = 0444, + .proc_handler = drop_caches_count_sysctl_handler, + }, #ifdef CONFIG_COMPACTION { .procname = "compact_memory", -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html