From: Keith Busch <kbusch@xxxxxxxxxx> Reclaim anonymous pages if a migration path is available now that demotion provides a non-swap recourse for reclaiming anon pages. Note that this check is subtly different from the anon_should_be_aged() checks. This mechanism checks whether a specific page in a specific context *can* actually be reclaimed, given current swap space and cgroup limits anon_should_be_aged() is a much simpler and more preliminary check which just says whether there is a possibility of future reclaim. Cc: Keith Busch <kbusch@xxxxxxxxxx> Signed-off-by: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> Signed-off-by: "Huang, Ying" <ying.huang@xxxxxxxxx> Reviewed-by: Yang Shi <shy828301@xxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxx> Cc: Zi Yan <ziy@xxxxxxxxxx> Cc: Wei Xu <weixugc@xxxxxxxxxx> Cc: David Rientjes <rientjes@xxxxxxxxxx> Cc: Dan Williams <dan.j.williams@xxxxxxxxx> Cc: David Hildenbrand <david@xxxxxxxxxx> -- Changes since 20210618: * Consider whether demotion is disabled Changes from Dave 202010: * remove 'total_swap_pages' modification Changes from Dave 202006: * rename reclaim_anon_pages()->can_reclaim_anon_pages() Note: Keith's Intel SoB is commented out because he is no longer at Intel and his @intel.com mail will bounce. --- mm/vmscan.c | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 55f6192b2a51..fce43c7970d7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -519,6 +519,36 @@ static long add_nr_deferred(long nr, struct shrinker *shrinker, return atomic_long_add_return(nr, &shrinker->nr_deferred[nid]); } +static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg, + int node_id, + struct scan_control *sc) +{ + if (memcg == NULL) { + /* + * For non-memcg reclaim, is there + * space in any swap device? + */ + if (get_nr_swap_pages() > 0) + return true; + } else { + /* Is the memcg below its swap limit? */ + if (mem_cgroup_get_nr_swap_pages(memcg) > 0) + return true; + } + + /* + * The page can not be swapped. + * + * Can it be reclaimed from this node via demotion? + */ + if ((!sc || !sc->no_demotion) && + next_demotion_node(node_id) != NUMA_NO_NODE) + return true; + + /* No way to reclaim anon pages */ + return false; +} + /* * This misses isolated pages which are not accounted for to save counters. * As the data only determines if reclaim or compaction continues, it is @@ -530,7 +560,7 @@ unsigned long zone_reclaimable_pages(struct zone *zone) nr = zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_FILE) + zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_FILE); - if (get_nr_swap_pages() > 0) + if (can_reclaim_anon_pages(NULL, zone_to_nid(zone), NULL)) nr += zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_ANON) + zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_ANON); @@ -2531,6 +2561,7 @@ enum scan_balance { static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, unsigned long *nr) { + struct pglist_data *pgdat = lruvec_pgdat(lruvec); struct mem_cgroup *memcg = lruvec_memcg(lruvec); unsigned long anon_cost, file_cost, total_cost; int swappiness = mem_cgroup_swappiness(memcg); @@ -2541,7 +2572,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, enum lru_list lru; /* If we have no swap space, do not bother scanning anon pages. */ - if (!sc->may_swap || mem_cgroup_get_nr_swap_pages(memcg) <= 0) { + if (!sc->may_swap || !can_reclaim_anon_pages(memcg, pgdat->node_id, sc)) { scan_balance = SCAN_FILE; goto out; } @@ -2916,7 +2947,7 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat, */ pages_for_compaction = compact_gap(sc->order); inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE); - if (get_nr_swap_pages() > 0) + if (can_reclaim_anon_pages(NULL, pgdat->node_id, sc)) inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON); return inactive_lru_pages > pages_for_compaction; -- 2.30.2