From: Andrea Arcangeli <aarcange@xxxxxxxxxx> This takes advantage of memory compaction to properly generate pages of order > 0 if regular page reclaim fails and priority level becomes more severe and we don't reach the proper watermarks. Signed-off-by: Andrea Arcangeli <aarcange@xxxxxxxxxx> --- diff --git a/include/linux/compaction.h b/include/linux/compaction.h --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -11,6 +11,9 @@ /* The full zone was compacted */ #define COMPACT_COMPLETE 3 +#define COMPACT_MODE_DIRECT_RECLAIM 0 +#define COMPACT_MODE_KSWAPD 1 + #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; extern int sysctl_compaction_handler(struct ctl_table *table, int write, @@ -20,6 +23,9 @@ extern int sysctl_extfrag_handler(struct void __user *buffer, size_t *length, loff_t *ppos); extern int fragmentation_index(struct zone *zone, unsigned int order); +extern unsigned long compact_zone_order(struct zone *zone, + int order, gfp_t gfp_mask, + int compact_mode); extern unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); @@ -59,6 +65,13 @@ static inline unsigned long try_to_compa return COMPACT_CONTINUE; } +static inline unsigned long compact_zone_order(struct zone *zone, + int order, gfp_t gfp_mask, + int compact_mode) +{ + return COMPACT_CONTINUE; +} + static inline void defer_compaction(struct zone *zone) { } diff --git a/mm/compaction.c b/mm/compaction.c --- a/mm/compaction.c +++ b/mm/compaction.c @@ -38,6 +38,8 @@ struct compact_control { unsigned int order; /* order a direct compactor needs */ int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; + + int compact_mode; }; static unsigned long release_freepages(struct list_head *freelist) @@ -357,10 +359,10 @@ static void update_nr_listpages(struct c } static int compact_finished(struct zone *zone, - struct compact_control *cc) + struct compact_control *cc) { unsigned int order; - unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order); + unsigned long watermark; if (fatal_signal_pending(current)) return COMPACT_PARTIAL; @@ -370,12 +372,27 @@ static int compact_finished(struct zone return COMPACT_COMPLETE; /* Compaction run is not finished if the watermark is not met */ + if (cc->compact_mode != COMPACT_MODE_KSWAPD) + watermark = low_wmark_pages(zone); + else + watermark = high_wmark_pages(zone); + watermark += (1 << cc->order); + if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) return COMPACT_CONTINUE; if (cc->order == -1) return COMPACT_CONTINUE; + /* + * Generating only one page of the right order is not enough + * for kswapd, we must continue until we're above the high + * watermark as a pool for high order GFP_ATOMIC allocations + * too. + */ + if (cc->compact_mode == COMPACT_MODE_KSWAPD) + return COMPACT_CONTINUE; + /* Direct compactor: Is a suitable page free? */ for (order = cc->order; order < MAX_ORDER; order++) { /* Job done if page is free of the right migratetype */ @@ -433,8 +450,9 @@ static int compact_zone(struct zone *zon return ret; } -static unsigned long compact_zone_order(struct zone *zone, - int order, gfp_t gfp_mask) +unsigned long compact_zone_order(struct zone *zone, + int order, gfp_t gfp_mask, + int compact_mode) { struct compact_control cc = { .nr_freepages = 0, @@ -442,6 +460,7 @@ static unsigned long compact_zone_order( .order = order, .migratetype = allocflags_to_migratetype(gfp_mask), .zone = zone, + .compact_mode = compact_mode, }; INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); @@ -476,7 +495,7 @@ unsigned long try_to_compact_pages(struc * made because an assumption is made that the page allocator can satisfy * the "cheaper" orders without taking special steps */ - if (order <= PAGE_ALLOC_COSTLY_ORDER || !may_enter_fs || !may_perform_io) + if (!order || !may_enter_fs || !may_perform_io) return rc; count_vm_event(COMPACTSTALL); @@ -517,7 +536,8 @@ unsigned long try_to_compact_pages(struc break; } - status = compact_zone_order(zone, order, gfp_mask); + status = compact_zone_order(zone, order, gfp_mask, + COMPACT_MODE_DIRECT_RECLAIM); rc = max(status, rc); if (zone_watermark_ok(zone, order, watermark, 0, 0)) @@ -547,6 +567,7 @@ static int compact_node(int nid) .nr_freepages = 0, .nr_migratepages = 0, .order = -1, + .compact_mode = COMPACT_MODE_DIRECT_RECLAIM, }; zone = &pgdat->node_zones[zoneid]; diff --git a/mm/vmscan.c b/mm/vmscan.c --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -40,6 +40,7 @@ #include <linux/memcontrol.h> #include <linux/delayacct.h> #include <linux/sysctl.h> +#include <linux/compaction.h> #include <asm/tlbflush.h> #include <asm/div64.h> @@ -2254,6 +2255,7 @@ loop_again: * cause too much scanning of the lower zones. */ for (i = 0; i <= end_zone; i++) { + int compaction; struct zone *zone = pgdat->node_zones + i; int nr_slab; @@ -2283,9 +2285,26 @@ loop_again: lru_pages); sc.nr_reclaimed += reclaim_state->reclaimed_slab; total_scanned += sc.nr_scanned; + + compaction = 0; + if (order && + zone_watermark_ok(zone, 0, + high_wmark_pages(zone), + end_zone, 0) && + !zone_watermark_ok(zone, order, + high_wmark_pages(zone), + end_zone, 0)) { + compact_zone_order(zone, + order, + sc.gfp_mask, + COMPACT_MODE_KSWAPD); + compaction = 1; + } + if (zone->all_unreclaimable) continue; - if (nr_slab == 0 && !zone_reclaimable(zone)) + if (!compaction && nr_slab == 0 && + !zone_reclaimable(zone)) zone->all_unreclaimable = 1; /* * If we've done a decent amount of scanning and -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxxx For more info on Linux MM, see: http://www.linux-mm.org/ . Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/ Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>