kswapd is woken when zones are below the low watermark but the wakeup decision is not taking the classzone into account. Now that reclaim is node-based, it is only required to wake kswapd once per node and only if all zones are unbalanced for the requested classzone. Note that one node might be checked multiple times if the zonelist is ordered by node because there is no cheap way of tracking what nodes have already been visited. For zone-ordering, each node should be checked only once. Signed-off-by: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> Acked-by: Vlastimil Babka <vbabka@xxxxxxx> --- mm/page_alloc.c | 8 ++++++-- mm/vmscan.c | 13 +++++++++++-- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fd34b305c8ee..bb261885c121 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3410,10 +3410,14 @@ static void wake_all_kswapds(unsigned int order, const struct alloc_context *ac) { struct zoneref *z; struct zone *zone; + pg_data_t *last_pgdat = NULL; for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, - ac->high_zoneidx, ac->nodemask) - wakeup_kswapd(zone, order, ac_classzone_idx(ac)); + ac->high_zoneidx, ac->nodemask) { + if (last_pgdat != zone->zone_pgdat) + wakeup_kswapd(zone, order, ac_classzone_idx(ac)); + last_pgdat = zone->zone_pgdat; + } } static inline unsigned int diff --git a/mm/vmscan.c b/mm/vmscan.c index 5ad670881d8d..cc820bbe9c01 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3419,6 +3419,7 @@ static int kswapd(void *p) void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) { pg_data_t *pgdat; + int z; if (!populated_zone(zone)) return; @@ -3430,8 +3431,16 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) pgdat->kswapd_order = max(pgdat->kswapd_order, order); if (!waitqueue_active(&pgdat->kswapd_wait)) return; - if (zone_balanced(zone, order, 0)) - return; + + /* Only wake kswapd if all zones are unbalanced */ + for (z = 0; z <= classzone_idx; z++) { + zone = pgdat->node_zones + z; + if (!populated_zone(zone)) + continue; + + if (zone_balanced(zone, order, classzone_idx)) + return; + } trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order); wake_up_interruptible(&pgdat->kswapd_wait); -- 2.6.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>