In /proc/meminfo, we can show the sum of all the available memory as "MemAvailable". Add the same counter also to per-node meminfo under /sys. With this counter, some processes that bind nodes can make some decisions by reading the "MemAvailable" of the corresponding nodes directly. Signed-off-by: Qi Zheng <zhengqi.arch@xxxxxxxxxxxxx> --- drivers/base/node.c | 4 ++++ include/linux/mm.h | 1 + include/linux/mmzone.h | 5 +++++ mm/page_alloc.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+) diff --git a/drivers/base/node.c b/drivers/base/node.c index 87acc47e8951..deb2a7965ae4 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -375,8 +375,10 @@ static ssize_t node_read_meminfo(struct device *dev, struct sysinfo i; unsigned long sreclaimable, sunreclaimable; unsigned long swapcached = 0; + long available; si_meminfo_node(&i, nid); + available = si_mem_available_node(&i, nid); sreclaimable = node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B); sunreclaimable = node_page_state_pages(pgdat, NR_SLAB_UNRECLAIMABLE_B); #ifdef CONFIG_SWAP @@ -386,6 +388,7 @@ static ssize_t node_read_meminfo(struct device *dev, "Node %d MemTotal: %8lu kB\n" "Node %d MemFree: %8lu kB\n" "Node %d MemUsed: %8lu kB\n" + "Node %d MemAvailable: %8lu kB\n" "Node %d SwapCached: %8lu kB\n" "Node %d Active: %8lu kB\n" "Node %d Inactive: %8lu kB\n" @@ -398,6 +401,7 @@ static ssize_t node_read_meminfo(struct device *dev, nid, K(i.totalram), nid, K(i.freeram), nid, K(i.totalram - i.freeram), + nid, K(available), nid, K(swapcached), nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) + node_page_state(pgdat, NR_ACTIVE_FILE)), diff --git a/include/linux/mm.h b/include/linux/mm.h index 1d4f731a8e18..34a5f5df388b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2532,6 +2532,7 @@ extern void mem_init(void); extern void __init mmap_init(void); extern void show_mem(unsigned int flags, nodemask_t *nodemask); extern long si_mem_available(void); +extern long si_mem_available_node(struct sysinfo *val, int nid); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); #ifdef __HAVE_ARCH_RESERVED_KERNEL_PAGES diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 936dc0b6c226..321c12f6272f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1132,6 +1132,11 @@ extern struct zone *next_zone(struct zone *zone); ; /* do nothing */ \ else +#define for_each_pgdat_zone(pgdat, zone) \ + for (zone = (pgdat)->node_zones; \ + zone < (pgdat)->node_zones + MAX_NR_ZONES - 1 && zone; \ + zone++) + static inline struct zone *zonelist_zone(struct zoneref *zoneref) { return zoneref->zone; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index edfd6c81af82..31f5e3e335cf 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5731,6 +5731,55 @@ static inline void show_node(struct zone *zone) printk("Node %d ", zone_to_nid(zone)); } +/** + * si_mem_available_node - helper to calculate the size of available memory + * of the given node + * @val: pointer to struct sysinfo + * @nid: the node id + */ +long si_mem_available_node(struct sysinfo *val, int nid) +{ + long available; + unsigned long pagecache; + unsigned long reclaimable; + unsigned long wmark_low = 0; + struct pglist_data *pgdat = NODE_DATA(nid); + struct zone *zone; + + for_each_pgdat_zone(pgdat, zone) + wmark_low += low_wmark_pages(zone); + + /* + * Estimate the amount of memory available for userspace allocations, + * without causing swapping. + */ + available = val->freeram - pgdat->totalreserve_pages; + + /* + * Not all the page cache can be freed, otherwise the system will + * start swapping. Assume at least half of the page cache, or the + * low watermark worth of cache, needs to stay. + */ + pagecache = node_page_state(pgdat, NR_ACTIVE_FILE) + + node_page_state(pgdat, NR_INACTIVE_FILE); + pagecache -= min(pagecache / 2, wmark_low); + available += pagecache; + + /* + * Part of the reclaimable slab and other kernel memory consists of + * items that are in use, and cannot be freed. Cap this estimate at the + * low watermark. + */ + reclaimable = node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B) + + node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE); + reclaimable -= min(reclaimable / 2, wmark_low); + available += reclaimable; + + if (available < 0) + available = 0; + return available; +} + long si_mem_available(void) { long available; -- 2.11.0