From: Ankita Garg <gargankita@xxxxxxxxx> Modify vmscan to take into account the changed node-zone hierarchy. Signed-off-by: Ankita Garg <gargankita@xxxxxxxxx> Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@xxxxxxxxxxxxxxxxxx> --- mm/vmscan.c | 364 +++++++++++++++++++++++++++++++---------------------------- 1 file changed, 193 insertions(+), 171 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 2624edc..4d8f303 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2209,11 +2209,14 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) unsigned long free_pages = 0; int i; bool wmark_ok; + struct mem_region *region; for (i = 0; i <= ZONE_NORMAL; i++) { - zone = &pgdat->node_zones[i]; - pfmemalloc_reserve += min_wmark_pages(zone); - free_pages += zone_page_state(zone, NR_FREE_PAGES); + for_each_mem_region_in_node(region, pgdat->node_id) { + zone = ®ion->region_zones[i]; + pfmemalloc_reserve += min_wmark_pages(zone); + free_pages += zone_page_state(zone, NR_FREE_PAGES); + } } wmark_ok = free_pages > pfmemalloc_reserve / 2; @@ -2442,10 +2445,16 @@ static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages, int classzone_idx) { unsigned long present_pages = 0; + struct mem_region *region; int i; - for (i = 0; i <= classzone_idx; i++) - present_pages += pgdat->node_zones[i].present_pages; + for (i = 0; i <= classzone_idx; i++) { + for_each_mem_region_in_node(region, pgdat->node_id) { + struct zone *zone = region->region_zones + i; + + present_pages += zone->present_pages; + } + } /* A special case here: if zone has no page, we think it's balanced */ return balanced_pages >= (present_pages >> 2); @@ -2463,6 +2472,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, int i; unsigned long balanced = 0; bool all_zones_ok = true; + struct mem_region *region; /* If a direct reclaimer woke kswapd within HZ/10, it's premature */ if (remaining) @@ -2484,27 +2494,29 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, /* Check the watermark levels */ for (i = 0; i <= classzone_idx; i++) { - struct zone *zone = pgdat->node_zones + i; + for_each_mem_region_in_node(region, pgdat->node_id) { + struct zone *zone = region->region_zones + i; - if (!populated_zone(zone)) - continue; + if (!populated_zone(zone)) + continue; - /* - * balance_pgdat() skips over all_unreclaimable after - * DEF_PRIORITY. Effectively, it considers them balanced so - * they must be considered balanced here as well if kswapd - * is to sleep - */ - if (zone->all_unreclaimable) { - balanced += zone->present_pages; - continue; - } + /* + * balance_pgdat() skips over all_unreclaimable after + * DEF_PRIORITY. Effectively, it considers them balanced so + * they must be considered balanced here as well if kswapd + * is to sleep + */ + if (zone->all_unreclaimable) { + balanced += zone->present_pages; + continue; + } - if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), - i, 0)) - all_zones_ok = false; - else - balanced += zone->present_pages; + if (!zone_watermark_ok_safe(zone, order, + high_wmark_pages(zone), i, 0)) + all_zones_ok = false; + else + balanced += zone->present_pages; + } } /* @@ -2565,6 +2577,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, struct shrink_control shrink = { .gfp_mask = sc.gfp_mask, }; + struct mem_region *region; loop_again: total_scanned = 0; sc.priority = DEF_PRIORITY; @@ -2583,49 +2596,55 @@ loop_again: * Scan in the highmem->dma direction for the highest * zone which needs scanning */ - for (i = pgdat->nr_zones - 1; i >= 0; i--) { - struct zone *zone = pgdat->node_zones + i; + for (i = pgdat->nr_node_zone_types - 1; i >= 0; i--) { + for_each_mem_region_in_node(region, pgdat->node_id) { + struct zone *zone = region->region_zones + i; - if (!populated_zone(zone)) - continue; + if (!populated_zone(zone)) + continue; - if (zone->all_unreclaimable && - sc.priority != DEF_PRIORITY) - continue; + if (zone->all_unreclaimable && + sc.priority != DEF_PRIORITY) + continue; - /* - * Do some background aging of the anon list, to give - * pages a chance to be referenced before reclaiming. - */ - age_active_anon(zone, &sc); + /* + * Do some background aging of the anon list, to give + * pages a chance to be referenced before reclaiming. + */ + age_active_anon(zone, &sc); - /* - * If the number of buffer_heads in the machine - * exceeds the maximum allowed level and this node - * has a highmem zone, force kswapd to reclaim from - * it to relieve lowmem pressure. - */ - if (buffer_heads_over_limit && is_highmem_idx(i)) { - end_zone = i; - break; - } + /* + * If the number of buffer_heads in the machine + * exceeds the maximum allowed level and this node + * has a highmem zone, force kswapd to reclaim from + * it to relieve lowmem pressure. + */ + if (buffer_heads_over_limit && is_highmem_idx(i)) { + end_zone = i; + goto out_loop; + } - if (!zone_watermark_ok_safe(zone, order, - high_wmark_pages(zone), 0, 0)) { - end_zone = i; - break; - } else { - /* If balanced, clear the congested flag */ - zone_clear_flag(zone, ZONE_CONGESTED); + if (!zone_watermark_ok_safe(zone, order, + high_wmark_pages(zone), 0, 0)) { + end_zone = i; + goto out_loop; + } else { + /* If balanced, clear the congested flag */ + zone_clear_flag(zone, ZONE_CONGESTED); + } } } + + out_loop: if (i < 0) goto out; for (i = 0; i <= end_zone; i++) { - struct zone *zone = pgdat->node_zones + i; + for_each_mem_region_in_node(region, pgdat->node_id) { + struct zone *zone = region->region_zones + i; - lru_pages += zone_reclaimable_pages(zone); + lru_pages += zone_reclaimable_pages(zone); + } } /* @@ -2638,108 +2657,109 @@ loop_again: * cause too much scanning of the lower zones. */ for (i = 0; i <= end_zone; i++) { - struct zone *zone = pgdat->node_zones + i; - int nr_slab, testorder; - unsigned long balance_gap; - - if (!populated_zone(zone)) - continue; + for_each_mem_region_in_node(region, pgdat->node_id) { + struct zone *zone = region->region_zones + i; + int nr_slab, testorder; + unsigned long balance_gap; - if (zone->all_unreclaimable && - sc.priority != DEF_PRIORITY) - continue; - - sc.nr_scanned = 0; - - nr_soft_scanned = 0; - /* - * Call soft limit reclaim before calling shrink_zone. - */ - nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, - order, sc.gfp_mask, - &nr_soft_scanned); - sc.nr_reclaimed += nr_soft_reclaimed; - total_scanned += nr_soft_scanned; - - /* - * We put equal pressure on every zone, unless - * one zone has way too many pages free - * already. The "too many pages" is defined - * as the high wmark plus a "gap" where the - * gap is either the low watermark or 1% - * of the zone, whichever is smaller. - */ - balance_gap = min(low_wmark_pages(zone), - (zone->present_pages + - KSWAPD_ZONE_BALANCE_GAP_RATIO-1) / - KSWAPD_ZONE_BALANCE_GAP_RATIO); - /* - * Kswapd reclaims only single pages with compaction - * enabled. Trying too hard to reclaim until contiguous - * free pages have become available can hurt performance - * by evicting too much useful data from memory. - * Do not reclaim more than needed for compaction. - */ - testorder = order; - if (COMPACTION_BUILD && order && - compaction_suitable(zone, order) != - COMPACT_SKIPPED) - testorder = 0; - - if ((buffer_heads_over_limit && is_highmem_idx(i)) || - !zone_watermark_ok_safe(zone, testorder, - high_wmark_pages(zone) + balance_gap, - end_zone, 0)) { - shrink_zone(zone, &sc); - - reclaim_state->reclaimed_slab = 0; - nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages); - sc.nr_reclaimed += reclaim_state->reclaimed_slab; - total_scanned += sc.nr_scanned; + if (!populated_zone(zone)) + continue; - if (nr_slab == 0 && !zone_reclaimable(zone)) - zone->all_unreclaimable = 1; - } + if (zone->all_unreclaimable && + sc.priority != DEF_PRIORITY) + continue; - /* - * If we've done a decent amount of scanning and - * the reclaim ratio is low, start doing writepage - * even in laptop mode - */ - if (total_scanned > SWAP_CLUSTER_MAX * 2 && - total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) - sc.may_writepage = 1; + sc.nr_scanned = 0; - if (zone->all_unreclaimable) { - if (end_zone && end_zone == i) - end_zone--; - continue; - } + nr_soft_scanned = 0; + /* + * Call soft limit reclaim before calling shrink_zone. + */ + nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, + order, sc.gfp_mask, + &nr_soft_scanned); + sc.nr_reclaimed += nr_soft_reclaimed; + total_scanned += nr_soft_scanned; - if (!zone_watermark_ok_safe(zone, testorder, - high_wmark_pages(zone), end_zone, 0)) { - all_zones_ok = 0; /* - * We are still under min water mark. This - * means that we have a GFP_ATOMIC allocation - * failure risk. Hurry up! + * We put equal pressure on every zone, unless + * one zone has way too many pages free + * already. The "too many pages" is defined + * as the high wmark plus a "gap" where the + * gap is either the low watermark or 1% + * of the zone, whichever is smaller. */ - if (!zone_watermark_ok_safe(zone, order, - min_wmark_pages(zone), end_zone, 0)) - has_under_min_watermark_zone = 1; - } else { + balance_gap = min(low_wmark_pages(zone), + (zone->present_pages + + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) / + KSWAPD_ZONE_BALANCE_GAP_RATIO); /* - * If a zone reaches its high watermark, - * consider it to be no longer congested. It's - * possible there are dirty pages backed by - * congested BDIs but as pressure is relieved, - * speculatively avoid congestion waits + * Kswapd reclaims only single pages with compaction + * enabled. Trying too hard to reclaim until contiguous + * free pages have become available can hurt performance + * by evicting too much useful data from memory. + * Do not reclaim more than needed for compaction. */ - zone_clear_flag(zone, ZONE_CONGESTED); - if (i <= *classzone_idx) - balanced += zone->present_pages; - } + testorder = order; + if (COMPACTION_BUILD && order && + compaction_suitable(zone, order) != + COMPACT_SKIPPED) + testorder = 0; + + if ((buffer_heads_over_limit && is_highmem_idx(i)) || + !zone_watermark_ok_safe(zone, testorder, + high_wmark_pages(zone) + balance_gap, + end_zone, 0)) { + shrink_zone(zone, &sc); + + reclaim_state->reclaimed_slab = 0; + nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages); + sc.nr_reclaimed += reclaim_state->reclaimed_slab; + total_scanned += sc.nr_scanned; + + if (nr_slab == 0 && !zone_reclaimable(zone)) + zone->all_unreclaimable = 1; + } + /* + * If we've done a decent amount of scanning and + * the reclaim ratio is low, start doing writepage + * even in laptop mode + */ + if (total_scanned > SWAP_CLUSTER_MAX * 2 && + total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) + sc.may_writepage = 1; + + if (zone->all_unreclaimable) { + if (end_zone && end_zone == i) + end_zone--; + continue; + } + + if (!zone_watermark_ok_safe(zone, testorder, + high_wmark_pages(zone), end_zone, 0)) { + all_zones_ok = 0; + /* + * We are still under min water mark. This + * means that we have a GFP_ATOMIC allocation + * failure risk. Hurry up! + */ + if (!zone_watermark_ok_safe(zone, order, + min_wmark_pages(zone), end_zone, 0)) + has_under_min_watermark_zone = 1; + } else { + /* + * If a zone reaches its high watermark, + * consider it to be no longer congested. It's + * possible there are dirty pages backed by + * congested BDIs but as pressure is relieved, + * speculatively avoid congestion waits + */ + zone_clear_flag(zone, ZONE_CONGESTED); + if (i <= *classzone_idx) + balanced += zone->present_pages; + } + } } /* @@ -2817,34 +2837,36 @@ out: int zones_need_compaction = 1; for (i = 0; i <= end_zone; i++) { - struct zone *zone = pgdat->node_zones + i; + for_each_mem_region_in_node(region, pgdat->node_id) { + struct zone *zone = region->region_zones + i; - if (!populated_zone(zone)) - continue; + if (!populated_zone(zone)) + continue; - if (zone->all_unreclaimable && - sc.priority != DEF_PRIORITY) - continue; + if (zone->all_unreclaimable && + sc.priority != DEF_PRIORITY) + continue; - /* Would compaction fail due to lack of free memory? */ - if (COMPACTION_BUILD && - compaction_suitable(zone, order) == COMPACT_SKIPPED) - goto loop_again; + /* Would compaction fail due to lack of free memory? */ + if (COMPACTION_BUILD && + compaction_suitable(zone, order) == COMPACT_SKIPPED) + goto loop_again; - /* Confirm the zone is balanced for order-0 */ - if (!zone_watermark_ok(zone, 0, - high_wmark_pages(zone), 0, 0)) { - order = sc.order = 0; - goto loop_again; - } + /* Confirm the zone is balanced for order-0 */ + if (!zone_watermark_ok(zone, 0, + high_wmark_pages(zone), 0, 0)) { + order = sc.order = 0; + goto loop_again; + } - /* Check if the memory needs to be defragmented. */ - if (zone_watermark_ok(zone, order, - low_wmark_pages(zone), *classzone_idx, 0)) - zones_need_compaction = 0; + /* Check if the memory needs to be defragmented. */ + if (zone_watermark_ok(zone, order, + low_wmark_pages(zone), *classzone_idx, 0)) + zones_need_compaction = 0; - /* If balanced, clear the congested flag */ - zone_clear_flag(zone, ZONE_CONGESTED); + /* If balanced, clear the congested flag */ + zone_clear_flag(zone, ZONE_CONGESTED); + } } if (zones_need_compaction) @@ -2966,7 +2988,7 @@ static int kswapd(void *p) order = new_order = 0; balanced_order = 0; - classzone_idx = new_classzone_idx = pgdat->nr_zones - 1; + classzone_idx = new_classzone_idx = pgdat->nr_node_zone_types - 1; balanced_classzone_idx = classzone_idx; for ( ; ; ) { int ret; @@ -2981,7 +3003,7 @@ static int kswapd(void *p) new_order = pgdat->kswapd_max_order; new_classzone_idx = pgdat->classzone_idx; pgdat->kswapd_max_order = 0; - pgdat->classzone_idx = pgdat->nr_zones - 1; + pgdat->classzone_idx = pgdat->nr_node_zone_types - 1; } if (order < new_order || classzone_idx > new_classzone_idx) { @@ -2999,7 +3021,7 @@ static int kswapd(void *p) new_order = order; new_classzone_idx = classzone_idx; pgdat->kswapd_max_order = 0; - pgdat->classzone_idx = pgdat->nr_zones - 1; + pgdat->classzone_idx = pgdat->nr_node_zone_types - 1; } ret = try_to_freeze(); -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>