+ zvc-zone_reclaim-leave-1%-of-unmapped-pagecache-pages-for-file-i-o-tunable.patch added to -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled

     ZVC/zone_reclaim: Leave 1% of unmapped pagecache pages for file I/O (tunable)

has been added to the -mm tree.  Its filename is

     zvc-zone_reclaim-leave-1%-of-unmapped-pagecache-pages-for-file-i-o-tunable.patch

See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find
out what to do about this

------------------------------------------------------
Subject: ZVC/zone_reclaim: Leave 1% of unmapped pagecache pages for file I/O (tunable)
From: Christoph Lameter <clameter@xxxxxxx>

zone_reclaim: proc limit for the minimal amount of unmapped pagecache pages

Add /proc/sys/vm/min_unmapped to be able to control the percentage of
unmapped pages.  Zone reclaim will only be triggered if more than that
number of unmapped pages exist in a zone.

And remove some outdated comments.

Signed-off-by: Christoph Lameter <clameter@xxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxx>
---

 Documentation/sysctl/vm.txt |   12 ++++++++++++
 include/linux/mmzone.h      |    6 ++++++
 include/linux/swap.h        |    1 +
 include/linux/sysctl.h      |    2 +-
 kernel/sysctl.c             |   11 +++++++++++
 mm/page_alloc.c             |   22 ++++++++++++++++++++++
 mm/vmscan.c                 |   21 +++++++++------------
 7 files changed, 62 insertions(+), 13 deletions(-)

diff -puN Documentation/sysctl/vm.txt~zvc-zone_reclaim-leave-1%-of-unmapped-pagecache-pages-for-file-i-o-tunable Documentation/sysctl/vm.txt
--- a/Documentation/sysctl/vm.txt~zvc-zone_reclaim-leave-1%-of-unmapped-pagecache-pages-for-file-i-o-tunable
+++ a/Documentation/sysctl/vm.txt
@@ -28,6 +28,7 @@ Currently, these files are in /proc/sys/
 - block_dump
 - drop-caches
 - zone_reclaim_mode
+- min_unmapped
 - panic_on_oom
 
 ==============================================================
@@ -168,6 +169,17 @@ in all nodes of the system.
 
 =============================================================
 
+min_unmapped:
+
+A percentage of the file backed pages in each zone. Zone reclaim will only
+occur if more than this percentage of pages are file backed and unmapped.
+This is to insure that a minimal amount of local pages is still available
+for file I/O even if the node is overallocated.
+
+The default is 1 percent.
+
+=============================================================
+
 panic_on_oom
 
 This enables or disables panic on out-of-memory feature.  If this is set to 1,
diff -puN include/linux/mmzone.h~zvc-zone_reclaim-leave-1%-of-unmapped-pagecache-pages-for-file-i-o-tunable include/linux/mmzone.h
--- a/include/linux/mmzone.h~zvc-zone_reclaim-leave-1%-of-unmapped-pagecache-pages-for-file-i-o-tunable
+++ a/include/linux/mmzone.h
@@ -150,6 +150,10 @@ struct zone {
 	unsigned long		lowmem_reserve[MAX_NR_ZONES];
 
 #ifdef CONFIG_NUMA
+	/*
+	 * zone reclaim becomes active if more unmapped pages exist.
+	 */
+	unsigned long		min_unmapped;
 	struct per_cpu_pageset	*pageset[NR_CPUS];
 #else
 	struct per_cpu_pageset	pageset[NR_CPUS];
@@ -414,6 +418,8 @@ int lowmem_reserve_ratio_sysctl_handler(
 					void __user *, size_t *, loff_t *);
 int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *,
 					void __user *, size_t *, loff_t *);
+int sysctl_min_unmapped_sysctl_handler(struct ctl_table *, int,
+			struct file *, void __user *, size_t *, loff_t *);
 
 #include <linux/topology.h>
 /* Returns the number of the current Node. */
diff -puN include/linux/swap.h~zvc-zone_reclaim-leave-1%-of-unmapped-pagecache-pages-for-file-i-o-tunable include/linux/swap.h
--- a/include/linux/swap.h~zvc-zone_reclaim-leave-1%-of-unmapped-pagecache-pages-for-file-i-o-tunable
+++ a/include/linux/swap.h
@@ -189,6 +189,7 @@ extern long vm_total_pages;
 
 #ifdef CONFIG_NUMA
 extern int zone_reclaim_mode;
+extern int sysctl_min_unmapped;
 extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
 #else
 #define zone_reclaim_mode 0
diff -puN include/linux/sysctl.h~zvc-zone_reclaim-leave-1%-of-unmapped-pagecache-pages-for-file-i-o-tunable include/linux/sysctl.h
--- a/include/linux/sysctl.h~zvc-zone_reclaim-leave-1%-of-unmapped-pagecache-pages-for-file-i-o-tunable
+++ a/include/linux/sysctl.h
@@ -188,7 +188,7 @@ enum
 	VM_DROP_PAGECACHE=29,	/* int: nuke lots of pagecache */
 	VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */
 	VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */
-	VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */
+	VM_MIN_UNMAPPED=32,	/* Set min percent of unmapped pages */
 	VM_PANIC_ON_OOM=33,	/* panic at out-of-memory */
 	VM_VDSO_ENABLED=34,	/* map VDSO into new processes? */
 };
diff -puN kernel/sysctl.c~zvc-zone_reclaim-leave-1%-of-unmapped-pagecache-pages-for-file-i-o-tunable kernel/sysctl.c
--- a/kernel/sysctl.c~zvc-zone_reclaim-leave-1%-of-unmapped-pagecache-pages-for-file-i-o-tunable
+++ a/kernel/sysctl.c
@@ -933,6 +933,17 @@ static ctl_table vm_table[] = {
 		.strategy	= &sysctl_intvec,
 		.extra1		= &zero,
 	},
+	{
+		.ctl_name	= VM_MIN_UNMAPPED,
+		.procname	= "min_unmapped",
+		.data		= &sysctl_min_unmapped,
+		.maxlen		= sizeof(sysctl_min_unmapped),
+		.mode		= 0644,
+		.proc_handler	= &sysctl_min_unmapped_sysctl_handler,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+		.extra2		= &one_hundred,
+	},
 #endif
 #ifdef CONFIG_X86_32
 	{
diff -puN mm/page_alloc.c~zvc-zone_reclaim-leave-1%-of-unmapped-pagecache-pages-for-file-i-o-tunable mm/page_alloc.c
--- a/mm/page_alloc.c~zvc-zone_reclaim-leave-1%-of-unmapped-pagecache-pages-for-file-i-o-tunable
+++ a/mm/page_alloc.c
@@ -2006,6 +2006,10 @@ static void __meminit free_area_init_cor
 
 		zone->spanned_pages = size;
 		zone->present_pages = realsize;
+#ifdef CONFIG_NUMA
+		zone->min_unmapped = (realsize * sysctl_min_unmapped)
+						/ 100;
+#endif
 		zone->name = zone_names[j];
 		spin_lock_init(&zone->lock);
 		spin_lock_init(&zone->lru_lock);
@@ -2299,6 +2303,24 @@ int min_free_kbytes_sysctl_handler(ctl_t
 	return 0;
 }
 
+#ifdef CONFIG_NUMA
+int sysctl_min_unmapped_sysctl_handler(ctl_table *table, int write,
+	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+{
+	struct zone *zone;
+	int rc;
+
+	rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+	if (rc)
+		return rc;
+
+	for_each_zone(zone)
+		zone->min_unmapped = (zone->present_pages *
+				sysctl_min_unmapped) / 100;
+	return 0;
+}
+#endif
+
 /*
  * lowmem_reserve_ratio_sysctl_handler - just a wrapper around
  *	proc_dointvec() so that we can call setup_per_zone_lowmem_reserve()
diff -puN mm/vmscan.c~zvc-zone_reclaim-leave-1%-of-unmapped-pagecache-pages-for-file-i-o-tunable mm/vmscan.c
--- a/mm/vmscan.c~zvc-zone_reclaim-leave-1%-of-unmapped-pagecache-pages-for-file-i-o-tunable
+++ a/mm/vmscan.c
@@ -1503,10 +1503,6 @@ module_init(kswapd_init)
  *
  * If non-zero call zone_reclaim when the number of free pages falls below
  * the watermarks.
- *
- * In the future we may add flags to the mode. However, the page allocator
- * should only have to check that zone_reclaim_mode != 0 before calling
- * zone_reclaim().
  */
 int zone_reclaim_mode __read_mostly;
 
@@ -1524,6 +1520,12 @@ int zone_reclaim_mode __read_mostly;
 #define ZONE_RECLAIM_PRIORITY 4
 
 /*
+ * Percentile of pages in a zone that must be unmapped
+ * for zone_reclaim to occur.
+ */
+int sysctl_min_unmapped = 1;
+
+/*
  * Try to free up some pages from this zone through reclaim.
  */
 static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
@@ -1595,16 +1597,11 @@ int zone_reclaim(struct zone *zone, gfp_
 	 * A small portion of unmapped file backed pages is needed for
 	 * file I/O otherwise pages read by file I/O will be immediately
 	 * thrown out if the zone is overallocated. So we do not reclaim
-	 * if less than 1% of the zone is used by unmapped file backed pages.
-	 *
-	 * The division by 128 approximates this and is here because a division
-	 * would be too expensive in this hot code path.
-	 *
-	 * Is it be useful to have a way to set the limit via /proc?
+	 * if less than a specified percentage of the zone is used by
+	 * unmapped file backed pages.
 	 */
 	if (zone_page_state(zone, NR_FILE_PAGES) -
-		zone_page_state(zone, NR_FILE_MAPPED) <
-			zone->present_pages / 128)
+		zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped)
 				return 0;
 
 	/*
_

Patches currently in -mm which might be from clameter@xxxxxxx are

origin.patch
zoned-vm-counters-create-vmstatc-h-from-page_allocc-h.patch
zoned-vm-counters-basic-zvc-zoned-vm-counter-implementation.patch
zoned-vm-counters-convert-nr_mapped-to-per-zone-counter.patch
zoned-vm-counters-conversion-of-nr_pagecache-to-per-zone-counter.patch
zoned-vm-counters-remove-nr_file_mapped-from-scan-control-structure.patch
zoned-vm-counters-split-nr_anon_pages-off-from-nr_file_mapped.patch
zoned-vm-counters-zone_reclaim-remove-proc-sys-vm-zone_reclaim_interval.patch
zoned-vm-counters-conversion-of-nr_slab-to-per-zone-counter.patch
zoned-vm-counters-conversion-of-nr_pagetables-to-per-zone-counter.patch
zoned-vm-counters-conversion-of-nr_dirty-to-per-zone-counter.patch
zoned-vm-counters-conversion-of-nr_writeback-to-per-zone-counter.patch
zoned-vm-counters-conversion-of-nr_unstable-to-per-zone-counter.patch
zoned-vm-counters-conversion-of-nr_bounce-to-per-zone-counter.patch
zoned-vm-counters-remove-useless-struct-wbs.patch
use-zoned-vm-counters-for-numa-statistics-v3.patch
light-weight-event-counters-v5.patch
slab-consolidate-code-to-free-slabs-from-freelist.patch
usb-remove-empty-destructor-from-drivers-usb-mon-mon_textc.patch
zvc-zone_reclaim-leave-1%-of-unmapped-pagecache-pages-for-file-i-o.patch
zvc-zone_reclaim-leave-1%-of-unmapped-pagecache-pages-for-file-i-o-tunable.patch

-
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Kernel Newbies FAQ]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Photo]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux