[PATCH 1/2] mm: add MemAvailable to per-node meminfo

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



In /proc/meminfo, we can show the sum of all the available memory
as "MemAvailable". Add the same counter also to per-node meminfo
under /sys.

With this counter, some processes that bind nodes can make some
decisions by reading the "MemAvailable" of the corresponding nodes
directly.

Signed-off-by: Qi Zheng <zhengqi.arch@xxxxxxxxxxxxx>
---
 drivers/base/node.c    |  4 ++++
 include/linux/mm.h     |  1 +
 include/linux/mmzone.h |  5 +++++
 mm/page_alloc.c        | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 59 insertions(+)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 87acc47e8951..deb2a7965ae4 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -375,8 +375,10 @@ static ssize_t node_read_meminfo(struct device *dev,
 	struct sysinfo i;
 	unsigned long sreclaimable, sunreclaimable;
 	unsigned long swapcached = 0;
+	long available;
 
 	si_meminfo_node(&i, nid);
+	available = si_mem_available_node(&i, nid);
 	sreclaimable = node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B);
 	sunreclaimable = node_page_state_pages(pgdat, NR_SLAB_UNRECLAIMABLE_B);
 #ifdef CONFIG_SWAP
@@ -386,6 +388,7 @@ static ssize_t node_read_meminfo(struct device *dev,
 			    "Node %d MemTotal:       %8lu kB\n"
 			    "Node %d MemFree:        %8lu kB\n"
 			    "Node %d MemUsed:        %8lu kB\n"
+			    "Node %d MemAvailable:   %8lu kB\n"
 			    "Node %d SwapCached:     %8lu kB\n"
 			    "Node %d Active:         %8lu kB\n"
 			    "Node %d Inactive:       %8lu kB\n"
@@ -398,6 +401,7 @@ static ssize_t node_read_meminfo(struct device *dev,
 			    nid, K(i.totalram),
 			    nid, K(i.freeram),
 			    nid, K(i.totalram - i.freeram),
+			    nid, K(available),
 			    nid, K(swapcached),
 			    nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) +
 				   node_page_state(pgdat, NR_ACTIVE_FILE)),
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1d4f731a8e18..34a5f5df388b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2532,6 +2532,7 @@ extern void mem_init(void);
 extern void __init mmap_init(void);
 extern void show_mem(unsigned int flags, nodemask_t *nodemask);
 extern long si_mem_available(void);
+extern long si_mem_available_node(struct sysinfo *val, int nid);
 extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
 #ifdef __HAVE_ARCH_RESERVED_KERNEL_PAGES
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 936dc0b6c226..321c12f6272f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1132,6 +1132,11 @@ extern struct zone *next_zone(struct zone *zone);
 			; /* do nothing */		\
 		else
 
+#define for_each_pgdat_zone(pgdat, zone)				\
+	for (zone = (pgdat)->node_zones;				\
+	     zone < (pgdat)->node_zones + MAX_NR_ZONES - 1 && zone;	\
+	     zone++)
+
 static inline struct zone *zonelist_zone(struct zoneref *zoneref)
 {
 	return zoneref->zone;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index edfd6c81af82..31f5e3e335cf 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5731,6 +5731,55 @@ static inline void show_node(struct zone *zone)
 		printk("Node %d ", zone_to_nid(zone));
 }
 
+/**
+ * si_mem_available_node - helper to calculate the size of available memory
+ *			   of the given node
+ * @val: pointer to struct sysinfo
+ * @nid: the node id
+ */
+long si_mem_available_node(struct sysinfo *val, int nid)
+{
+	long available;
+	unsigned long pagecache;
+	unsigned long reclaimable;
+	unsigned long wmark_low = 0;
+	struct pglist_data *pgdat = NODE_DATA(nid);
+	struct zone *zone;
+
+	for_each_pgdat_zone(pgdat, zone)
+		wmark_low += low_wmark_pages(zone);
+
+	/*
+	 * Estimate the amount of memory available for userspace allocations,
+	 * without causing swapping.
+	 */
+	available = val->freeram - pgdat->totalreserve_pages;
+
+	/*
+	 * Not all the page cache can be freed, otherwise the system will
+	 * start swapping. Assume at least half of the page cache, or the
+	 * low watermark worth of cache, needs to stay.
+	 */
+	pagecache = node_page_state(pgdat, NR_ACTIVE_FILE) +
+		    node_page_state(pgdat, NR_INACTIVE_FILE);
+	pagecache -= min(pagecache / 2, wmark_low);
+	available += pagecache;
+
+	/*
+	 * Part of the reclaimable slab and other kernel memory consists of
+	 * items that are in use, and cannot be freed. Cap this estimate at the
+	 * low watermark.
+	 */
+	reclaimable = node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B) +
+		      node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE);
+	reclaimable -= min(reclaimable / 2, wmark_low);
+	available += reclaimable;
+
+	if (available < 0)
+		available = 0;
+	return available;
+}
+
 long si_mem_available(void)
 {
 	long available;
-- 
2.11.0





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux