pass memcg's nodemask to try_to_free_pages(). try_to_free_pages can take nodemask as its argument but memcg doesn't pass it. Considering memcg can be used with cpuset on big NUMA, memcg should pass nodemask if available. Now, memcg maintain nodemask with periodic updates. pass it. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> --- include/linux/memcontrol.h | 2 +- mm/memcontrol.c | 8 ++++++-- mm/vmscan.c | 3 ++- 3 files changed, 9 insertions(+), 4 deletions(-) Index: mmotm-0710/include/linux/memcontrol.h =================================================================== --- mmotm-0710.orig/include/linux/memcontrol.h +++ mmotm-0710/include/linux/memcontrol.h @@ -117,7 +117,7 @@ extern void mem_cgroup_end_migration(str */ int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg); int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg); -int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); +int mem_cgroup_select_victim_node(struct mem_cgroup *memcg, nodemask_t **mask); unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid, unsigned int lrumask); struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, Index: mmotm-0710/mm/memcontrol.c =================================================================== --- mmotm-0710.orig/mm/memcontrol.c +++ mmotm-0710/mm/memcontrol.c @@ -1602,10 +1602,11 @@ static void mem_cgroup_may_update_nodema * * Now, we use round-robin. Better algorithm is welcomed. */ -int mem_cgroup_select_victim_node(struct mem_cgroup *mem) +int mem_cgroup_select_victim_node(struct mem_cgroup *mem, nodemask_t **mask) { int node; + *mask = NULL; mem_cgroup_may_update_nodemask(mem); node = mem->last_scanned_node; @@ -1620,6 +1621,8 @@ int mem_cgroup_select_victim_node(struct */ if (unlikely(node == MAX_NUMNODES)) node = numa_node_id(); + else + *mask = &mem->scan_nodes; mem->last_scanned_node = node; return node; @@ -1667,8 +1670,9 @@ static void mem_cgroup_numascan_init(str } #else -int mem_cgroup_select_victim_node(struct mem_cgroup *mem) +int mem_cgroup_select_victim_node(struct mem_cgroup *mem, nodemask_t **mask) { + *mask = NULL; return 0; } Index: mmotm-0710/mm/vmscan.c =================================================================== --- mmotm-0710.orig/mm/vmscan.c +++ mmotm-0710/mm/vmscan.c @@ -2280,6 +2280,7 @@ unsigned long try_to_free_mem_cgroup_pag unsigned long nr_reclaimed; unsigned long start, end; int nid; + nodemask_t *mask; struct scan_control sc = { .may_writepage = !laptop_mode, .may_unmap = 1, @@ -2302,7 +2303,7 @@ unsigned long try_to_free_mem_cgroup_pag * take care of from where we get pages. So the node where we start the * scan does not need to be the current node. */ - nid = mem_cgroup_select_victim_node(mem_cont); + nid = mem_cgroup_select_victim_node(mem_cont, &mask); zonelist = NODE_DATA(nid)->node_zonelists; -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/ Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>