When demoting to the migration target node, the target node may have memory pressure, then the memory pressure may cause migrate_pages() fail. If the failure is caused by memory pressure (i.e. returning -ENOMEM), tag the node with PGDAT_CONTENDED. The tag would be cleared once the target node is balanced again. Check if the target node is PGDAT_CONTENDED or not, if it is just skip demotion. Signed-off-by: Yang Shi <yang.shi@xxxxxxxxxxxxxxxxx> --- include/linux/mmzone.h | 3 +++ mm/vmscan.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 70394ca..d4e05c5 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -573,6 +573,9 @@ enum pgdat_flags { * many pages under writeback */ PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */ + PGDAT_CONTENDED, /* the node has not enough free memory + * available + */ }; enum zone_flags { diff --git a/mm/vmscan.c b/mm/vmscan.c index fb931ded..9ec55d7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1126,6 +1126,21 @@ static inline struct page *alloc_demote_page(struct page *page, } #endif +static inline bool is_migration_target_contended(int nid) +{ + int node; + nodemask_t used_mask; + + + nodes_clear(used_mask); + node = find_next_best_node(nid, &used_mask, true); + + if (test_bit(PGDAT_CONTENDED, &NODE_DATA(node)->flags)) + return true; + + return false; +} + static inline bool is_demote_ok(int nid, struct scan_control *sc) { /* Just do demotion with migrate mode of node reclaim */ @@ -1144,6 +1159,10 @@ static inline bool is_demote_ok(int nid, struct scan_control *sc) if (!has_migration_target_node_online()) return false; + /* Check if the demote target node is contended or not */ + if (is_migration_target_contended(nid)) + return false; + return true; } @@ -1564,6 +1583,10 @@ static unsigned long shrink_page_list(struct list_head *page_list, nr_reclaimed += nr_succeeded; if (err) { + if (err == -ENOMEM) + set_bit(PGDAT_CONTENDED, + &NODE_DATA(target_nid)->flags); + putback_movable_pages(&demote_pages); list_splice(&ret_pages, &demote_pages); @@ -2597,6 +2620,19 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc * scan target and the percentage scanning already complete */ lru = (lru == LRU_FILE) ? LRU_BASE : LRU_FILE; + + /* + * The shrink_page_list() may find the demote target node is + * contended, if so it doesn't make sense to scan anonymous + * LRU again. + * + * Need check if swap is available or not too since demotion + * may happen on swapless system. + */ + if (!is_demote_ok(pgdat->node_id, sc) && + (!sc->may_swap || mem_cgroup_get_nr_swap_pages(memcg) <= 0)) + lru = LRU_FILE; + nr_scanned = targets[lru] - nr[lru]; nr[lru] = targets[lru] * (100 - percentage) / 100; nr[lru] -= min(nr[lru], nr_scanned); @@ -3447,6 +3483,7 @@ static void clear_pgdat_congested(pg_data_t *pgdat) clear_bit(PGDAT_CONGESTED, &pgdat->flags); clear_bit(PGDAT_DIRTY, &pgdat->flags); clear_bit(PGDAT_WRITEBACK, &pgdat->flags); + clear_bit(PGDAT_CONTENDED, &pgdat->flags); } /* -- 1.8.3.1