Modify vmscan to take into account the changed node-zone hierarchy. Signed-off-by: Ankita Garg <ankita@xxxxxxxxxx> --- mm/vmscan.c | 284 ++++++++++++++++++++++++++++++++--------------------------- 1 files changed, 153 insertions(+), 131 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 8bfd450..2e11974 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2235,10 +2235,16 @@ static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages, int classzone_idx) { unsigned long present_pages = 0; - int i; - - for (i = 0; i <= classzone_idx; i++) - present_pages += pgdat->node_zones[i].present_pages; + int i, p; + + for (i = 0; i <= classzone_idx; i++) { + for_each_mem_region_in_nid(p, pgdat->node_id) { + mem_region_t *mem_region = &pgdat->mem_regions[p]; + struct zone *zone = mem_region->zones + i; + + present_pages += zone->present_pages; + } + } return balanced_pages > (present_pages >> 2); } @@ -2247,7 +2253,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages, static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining, int classzone_idx) { - int i; + int i, j; unsigned long balanced = 0; bool all_zones_ok = true; @@ -2257,29 +2263,31 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining, /* Check the watermark levels */ for (i = 0; i < pgdat->nr_zones; i++) { - struct zone *zone = pgdat->node_zones + i; + for_each_mem_region_in_nid(j, pgdat->node_id) { + mem_region_t *mem_region = &pgdat->mem_regions[j]; + struct zone *zone = mem_region->zones + i; - if (!populated_zone(zone)) - continue; + if (!populated_zone(zone)) + continue; - /* - * balance_pgdat() skips over all_unreclaimable after - * DEF_PRIORITY. Effectively, it considers them balanced so - * they must be considered balanced here as well if kswapd - * is to sleep - */ - if (zone->all_unreclaimable) { - balanced += zone->present_pages; - continue; - } + /* + * balance_pgdat() skips over all_unreclaimable after + * DEF_PRIORITY. Effectively, it considers them balanced so + * they must be considered balanced here as well if kswapd + * is to sleep + */ + if (zone->all_unreclaimable) { + balanced += zone->present_pages; + continue; + } - if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), - classzone_idx, 0)) - all_zones_ok = false; - else - balanced += zone->present_pages; + if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), + classzone_idx, 0)) + all_zones_ok = false; + else + balanced += zone->present_pages; + } } - /* * For high-order requests, the balanced zones must contain at least * 25% of the nodes pages for kswapd to sleep. For order-0, all zones @@ -2318,7 +2326,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, int all_zones_ok; unsigned long balanced; int priority; - int i; + int i, p; int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ unsigned long total_scanned; struct reclaim_state *reclaim_state = current->reclaim_state; @@ -2357,36 +2365,42 @@ loop_again: * zone which needs scanning */ for (i = pgdat->nr_zones - 1; i >= 0; i--) { - struct zone *zone = pgdat->node_zones + i; + for_each_mem_region_in_nid(p, pgdat->node_id) { + mem_region_t *mem_region = &pgdat->mem_regions[p]; + struct zone *zone = mem_region->zones + i; - if (!populated_zone(zone)) - continue; + if (!populated_zone(zone)) + continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) - continue; + if (zone->all_unreclaimable && priority != DEF_PRIORITY) + continue; - /* - * Do some background aging of the anon list, to give - * pages a chance to be referenced before reclaiming. - */ - if (inactive_anon_is_low(zone, &sc)) - shrink_active_list(SWAP_CLUSTER_MAX, zone, - &sc, priority, 0); - - if (!zone_watermark_ok_safe(zone, order, - high_wmark_pages(zone), 0, 0)) { - end_zone = i; - *classzone_idx = i; - break; + /* + * Do some background aging of the anon list, to give + * pages a chance to be referenced before reclaiming. + */ + if (inactive_anon_is_low(zone, &sc)) + shrink_active_list(SWAP_CLUSTER_MAX, zone, + &sc, priority, 0); + + if (!zone_watermark_ok_safe(zone, order, + high_wmark_pages(zone), 0, 0)) { + end_zone = i; + *classzone_idx = i; + break; + } } } if (i < 0) goto out; for (i = 0; i <= end_zone; i++) { - struct zone *zone = pgdat->node_zones + i; + for_each_mem_region_in_nid(p, pgdat->node_id) { + mem_region_t *mem_region = &pgdat->mem_regions[p]; + struct zone *zone = mem_region->zones + i; - lru_pages += zone_reclaimable_pages(zone); + lru_pages += zone_reclaimable_pages(zone); + } } /* @@ -2399,84 +2413,86 @@ loop_again: * cause too much scanning of the lower zones. */ for (i = 0; i <= end_zone; i++) { - struct zone *zone = pgdat->node_zones + i; - int nr_slab; - unsigned long balance_gap; - - if (!populated_zone(zone)) - continue; + for_each_mem_region_in_nid(p, pgdat->node_id) { + mem_region_t *mem_region = &pgdat->mem_regions[p]; + struct zone *zone = mem_region->zones + i; + int nr_slab; + unsigned long balance_gap; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) - continue; + if (!populated_zone(zone)) + continue; - sc.nr_scanned = 0; + if (zone->all_unreclaimable && priority != DEF_PRIORITY) + continue; - /* - * Call soft limit reclaim before calling shrink_zone. - * For now we ignore the return value - */ - mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask); + sc.nr_scanned = 0; - /* - * We put equal pressure on every zone, unless - * one zone has way too many pages free - * already. The "too many pages" is defined - * as the high wmark plus a "gap" where the - * gap is either the low watermark or 1% - * of the zone, whichever is smaller. - */ - balance_gap = min(low_wmark_pages(zone), - (zone->present_pages + - KSWAPD_ZONE_BALANCE_GAP_RATIO-1) / - KSWAPD_ZONE_BALANCE_GAP_RATIO); - if (!zone_watermark_ok_safe(zone, order, - high_wmark_pages(zone) + balance_gap, - end_zone, 0)) - shrink_zone(priority, zone, &sc); - reclaim_state->reclaimed_slab = 0; - nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, - lru_pages); - sc.nr_reclaimed += reclaim_state->reclaimed_slab; - total_scanned += sc.nr_scanned; - - if (zone->all_unreclaimable) - continue; - if (nr_slab == 0 && - !zone_reclaimable(zone)) - zone->all_unreclaimable = 1; - /* - * If we've done a decent amount of scanning and - * the reclaim ratio is low, start doing writepage - * even in laptop mode - */ - if (total_scanned > SWAP_CLUSTER_MAX * 2 && - total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) - sc.may_writepage = 1; + /* + * Call soft limit reclaim before calling shrink_zone. + * For now we ignore the return value + */ + mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask); - if (!zone_watermark_ok_safe(zone, order, - high_wmark_pages(zone), end_zone, 0)) { - all_zones_ok = 0; /* - * We are still under min water mark. This - * means that we have a GFP_ATOMIC allocation - * failure risk. Hurry up! + * We put equal pressure on every zone, unless + * one zone has way too many pages free + * already. The "too many pages" is defined + * as the high wmark plus a "gap" where the + * gap is either the low watermark or 1% + * of the zone, whichever is smaller. */ + balance_gap = min(low_wmark_pages(zone), + (zone->present_pages + + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) / + KSWAPD_ZONE_BALANCE_GAP_RATIO); if (!zone_watermark_ok_safe(zone, order, - min_wmark_pages(zone), end_zone, 0)) - has_under_min_watermark_zone = 1; - } else { + high_wmark_pages(zone) + balance_gap, + end_zone, 0)) + shrink_zone(priority, zone, &sc); + reclaim_state->reclaimed_slab = 0; + nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, + lru_pages); + sc.nr_reclaimed += reclaim_state->reclaimed_slab; + total_scanned += sc.nr_scanned; + + if (zone->all_unreclaimable) + continue; + if (nr_slab == 0 && + !zone_reclaimable(zone)) + zone->all_unreclaimable = 1; /* - * If a zone reaches its high watermark, - * consider it to be no longer congested. It's - * possible there are dirty pages backed by - * congested BDIs but as pressure is relieved, - * spectulatively avoid congestion waits + * If we've done a decent amount of scanning and + * the reclaim ratio is low, start doing writepage + * even in laptop mode */ - zone_clear_flag(zone, ZONE_CONGESTED); - if (i <= *classzone_idx) - balanced += zone->present_pages; - } + if (total_scanned > SWAP_CLUSTER_MAX * 2 && + total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) + sc.may_writepage = 1; + if (!zone_watermark_ok_safe(zone, order, + high_wmark_pages(zone), end_zone, 0)) { + all_zones_ok = 0; + /* + * We are still under min water mark. This + * means that we have a GFP_ATOMIC allocation + * failure risk. Hurry up! + */ + if (!zone_watermark_ok_safe(zone, order, + min_wmark_pages(zone), end_zone, 0)) + has_under_min_watermark_zone = 1; + } else { + /* + * If a zone reaches its high watermark, + * consider it to be no longer congested. It's + * possible there are dirty pages backed by + * congested BDIs but as pressure is relieved, + * spectulatively avoid congestion waits + */ + zone_clear_flag(zone, ZONE_CONGESTED); + if (i <= *classzone_idx) + balanced += zone->present_pages; + } + } } if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx))) break; /* kswapd: all done */ @@ -2542,23 +2558,26 @@ out: */ if (order) { for (i = 0; i <= end_zone; i++) { - struct zone *zone = pgdat->node_zones + i; + for_each_mem_region_in_nid(p, pgdat->node_id) { + mem_region_t *mem_region = &pgdat->mem_regions[p]; + struct zone *zone = mem_region->zones + i; - if (!populated_zone(zone)) - continue; + if (!populated_zone(zone)) + continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) - continue; + if (zone->all_unreclaimable && priority != DEF_PRIORITY) + continue; - /* Confirm the zone is balanced for order-0 */ - if (!zone_watermark_ok(zone, 0, - high_wmark_pages(zone), 0, 0)) { - order = sc.order = 0; - goto loop_again; - } + /* Confirm the zone is balanced for order-0 */ + if (!zone_watermark_ok(zone, 0, + high_wmark_pages(zone), 0, 0)) { + order = sc.order = 0; + goto loop_again; + } - /* If balanced, clear the congested flag */ - zone_clear_flag(zone, ZONE_CONGESTED); + /* If balanced, clear the congested flag */ + zone_clear_flag(zone, ZONE_CONGESTED); + } } } @@ -3304,18 +3323,21 @@ static ssize_t write_scan_unevictable_node(struct sys_device *dev, struct sysdev_attribute *attr, const char *buf, size_t count) { - struct zone *node_zones = NODE_DATA(dev->id)->node_zones; - struct zone *zone; unsigned long res; + int i,j; unsigned long req = strict_strtoul(buf, 10, &res); if (!req) return 1; /* zero is no-op */ - for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { - if (!populated_zone(zone)) - continue; - scan_zone_unevictable_pages(zone); + for (j = 0; j < MAX_NR_ZONES; ++j) { + for_each_mem_region_in_nid(i, dev->id) { + mem_region_t *mem_region = &(NODE_DATA(dev->id)->mem_regions[i]); + struct zone *zone = mem_region->zones; + if (!populated_zone(zone)) + continue; + scan_zone_unevictable_pages(zone); + } } return 1; } -- 1.7.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxxx For more info on Linux MM, see: http://www.linux-mm.org/ . Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/ Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>