Earlier patches focused on having direct reclaim and kswapd use data that is node-centric for reclaiming but shrink_node() itself still uses too much zone information. This patch removes unnecessary zone-based information with the most important decision being whether to continue reclaim or not. Some memcg APIs are adjusted as a result even though memcg itself still uses some zone information. Signed-off-by: Mel Gorman <mgorman@xxxxxxx> --- include/linux/memcontrol.h | 9 ++++---- include/linux/mmzone.h | 4 ++-- include/linux/swap.h | 2 +- mm/memcontrol.c | 17 ++++++++------- mm/page_alloc.c | 2 +- mm/vmscan.c | 53 ++++++++++++++++++++++++++-------------------- 6 files changed, 48 insertions(+), 39 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index df225059daf3..b1ba7f5b3851 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -84,7 +84,8 @@ void mem_cgroup_uncharge_list(struct list_head *page_list); void mem_cgroup_migrate(struct page *oldpage, struct page *newpage, bool lrucare); -struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); +struct lruvec *mem_cgroup_lruvec(struct pglist_data *, struct zone *zone, + struct mem_cgroup *); struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *); bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, @@ -240,10 +241,10 @@ static inline void mem_cgroup_migrate(struct page *oldpage, { } -static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, - struct mem_cgroup *memcg) +static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat, + struct zone *zone, struct mem_cgroup *memcg) { - return zone_lruvec(zone); + return node_lruvec(pgdat); } static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index fab74af19f26..1830c2180555 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -799,9 +799,9 @@ static inline spinlock_t *zone_lru_lock(struct zone *zone) return &zone->zone_pgdat->lru_lock; } -static inline struct lruvec *zone_lruvec(struct zone *zone) +static inline struct lruvec *node_lruvec(struct pglist_data *pgdat) { - return &zone->zone_pgdat->lruvec; + return &pgdat->lruvec; } static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) diff --git a/include/linux/swap.h b/include/linux/swap.h index 7067eca501e2..bb9597213e39 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -323,7 +323,7 @@ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, bool may_swap); -extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, +extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, struct zone *zone, unsigned long *nr_scanned); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 10eed58506a0..7c39930c8d86 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1170,22 +1170,23 @@ out: EXPORT_SYMBOL(__mem_cgroup_count_vm_event); /** - * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg + * mem_cgroup_lruvec - get the lru list vector for a node or a memcg zone + * @node: node of the wanted lruvec * @zone: zone of the wanted lruvec * @memcg: memcg of the wanted lruvec * - * Returns the lru list vector holding pages for the given @zone and - * @mem. This can be the global zone lruvec, if the memory controller + * Returns the lru list vector holding pages for a given @node or a given + * @memcg and @zone. This can be the node lruvec, if the memory controller * is disabled. */ -struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, - struct mem_cgroup *memcg) +struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat, + struct zone *zone, struct mem_cgroup *memcg) { struct mem_cgroup_per_zone *mz; struct lruvec *lruvec; if (mem_cgroup_disabled()) { - lruvec = zone_lruvec(zone); + lruvec = node_lruvec(pgdat); goto out; } @@ -1721,8 +1722,8 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, } continue; } - total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false, - zone, &nr_scanned); + total += mem_cgroup_shrink_node(victim, gfp_mask, false, + zone, &nr_scanned); *total_scanned += nr_scanned; if (!soft_limit_excess(root_memcg)) break; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 49a29e8ae493..34201c141916 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4879,6 +4879,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, init_waitqueue_head(&pgdat->kswapd_wait); init_waitqueue_head(&pgdat->pfmemalloc_wait); pgdat_page_ext_init(pgdat); + lruvec_init(node_lruvec(pgdat)); for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; @@ -4948,7 +4949,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, /* For bootup, initialized properly in watermark setup */ mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages); - lruvec_init(zone_lruvec(zone)); if (!size) continue; diff --git a/mm/vmscan.c b/mm/vmscan.c index e069decbcfa1..3a6a2fac48e5 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2278,13 +2278,14 @@ static bool in_reclaim_compaction(struct scan_control *sc) * calls try_to_compact_zone() that it will have enough free pages to succeed. * It will give up earlier than that if there is difficulty reclaiming pages. */ -static inline bool should_continue_reclaim(struct zone *zone, +static inline bool should_continue_reclaim(struct pglist_data *pgdat, unsigned long nr_reclaimed, unsigned long nr_scanned, struct scan_control *sc) { unsigned long pages_for_compaction; unsigned long inactive_lru_pages; + int z; /* If not in reclaim/compaction mode, stop */ if (!in_reclaim_compaction(sc)) @@ -2318,21 +2319,27 @@ static inline bool should_continue_reclaim(struct zone *zone, * inactive lists are large enough, continue reclaiming */ pages_for_compaction = (2UL << sc->order); - inactive_lru_pages = node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE); + inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE); if (get_nr_swap_pages() > 0) - inactive_lru_pages += node_page_state(zone->zone_pgdat, NR_INACTIVE_ANON); + inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON); if (sc->nr_reclaimed < pages_for_compaction && inactive_lru_pages > pages_for_compaction) return true; /* If compaction would go ahead or the allocation would succeed, stop */ - switch (compaction_suitable(zone, sc->order, 0, 0)) { - case COMPACT_PARTIAL: - case COMPACT_CONTINUE: - return false; - default: - return true; + for (z = 0; z <= sc->reclaim_idx; z++) { + struct zone *zone = &pgdat->node_zones[z]; + + switch (compaction_suitable(zone, sc->order, 0, 0)) { + case COMPACT_PARTIAL: + case COMPACT_CONTINUE: + return false; + default: + /* check next zone */ + ; + } } + return true; } static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, @@ -2342,15 +2349,14 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, struct reclaim_state *reclaim_state = current->reclaim_state; unsigned long nr_reclaimed, nr_scanned; bool reclaimable = false; - struct zone *zone = &pgdat->node_zones[classzone_idx]; do { struct mem_cgroup *root = sc->target_mem_cgroup; struct mem_cgroup_reclaim_cookie reclaim = { - .zone = zone, + .zone = &pgdat->node_zones[classzone_idx], .priority = sc->priority, }; - unsigned long zone_lru_pages = 0; + unsigned long node_lru_pages = 0; struct mem_cgroup *memcg; nr_reclaimed = sc->nr_reclaimed; @@ -2369,23 +2375,24 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, mem_cgroup_events(memcg, MEMCG_LOW, 1); } - lruvec = mem_cgroup_zone_lruvec(zone, memcg); + lruvec = mem_cgroup_lruvec(pgdat, + &pgdat->node_zones[reclaim_idx], memcg); swappiness = mem_cgroup_swappiness(memcg); scanned = sc->nr_scanned; sc->reclaim_idx = reclaim_idx; shrink_lruvec(lruvec, swappiness, sc, &lru_pages); - zone_lru_pages += lru_pages; + node_lru_pages += lru_pages; if (!global_reclaim(sc) && reclaim_idx == classzone_idx) - shrink_slab(sc->gfp_mask, zone_to_nid(zone), + shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->nr_scanned - scanned, lru_pages); /* * Direct reclaim and kswapd have to scan all memory * cgroups to fulfill the overall scan target for the - * zone. + * node. * * Limit reclaim, on the other hand, only cares about * nr_to_reclaim pages to be reclaimed and it will @@ -2404,9 +2411,9 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, * the eligible LRU pages were scanned. */ if (global_reclaim(sc) && reclaim_idx == classzone_idx) - shrink_slab(sc->gfp_mask, zone_to_nid(zone), NULL, + shrink_slab(sc->gfp_mask, pgdat->node_id, NULL, sc->nr_scanned - nr_scanned, - zone_lru_pages); + node_lru_pages); if (reclaim_state) { sc->nr_reclaimed += reclaim_state->reclaimed_slab; @@ -2420,7 +2427,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, if (sc->nr_reclaimed - nr_reclaimed) reclaimable = true; - } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed, + } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, sc->nr_scanned - nr_scanned, sc)); return reclaimable; @@ -2822,7 +2829,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, #ifdef CONFIG_MEMCG -unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, +unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, gfp_t gfp_mask, bool noswap, struct zone *zone, unsigned long *nr_scanned) @@ -2834,7 +2841,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, .may_unmap = 1, .may_swap = !noswap, }; - struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); + struct lruvec *lruvec = mem_cgroup_lruvec(zone->zone_pgdat, zone, memcg); int swappiness = mem_cgroup_swappiness(memcg); unsigned long lru_pages; @@ -2848,7 +2855,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, /* * NOTE: Although we can get the priority field, using it * here is not a good idea, since it limits the pages we can scan. - * if we don't reclaim here, the shrink_zone from balance_pgdat + * if we don't reclaim here, the shrink_node from balance_pgdat * will pick up pages from other mem cgroup's as well. We hack * the priority and make it zero. */ @@ -2910,7 +2917,7 @@ static void age_active_anon(struct pglist_data *pgdat, memcg = mem_cgroup_iter(NULL, NULL, NULL); do { - struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); + struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, zone, memcg); if (inactive_anon_is_low(lruvec)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, -- 2.3.5 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>