On Tue, Sep 04, 2012 at 03:26:25PM +0200, Bartlomiej Zolnierkiewicz wrote: > Pass GFP flags to [__]zone_watermark_ok() and use them to account > free CMA pages only when necessary (there is no need to check > watermark against only non-CMA free pages for movable allocations). I want to make it zero-overhead in case of !CONFIG_CMA. We can reduce the number of zone_watermark_ok's argument and in case of !CONFIG_CMA, overhead would be zero. How about this? (Below is what I want to show the *concept*, NOT completed, NOT compile tested) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 009ac28..61c592a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1514,6 +1514,8 @@ failed: #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ +#define ALLOC_CMA 0x80 + #ifdef CONFIG_FAIL_PAGE_ALLOC static struct { @@ -1608,7 +1610,10 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, min -= min / 2; if (alloc_flags & ALLOC_HARDER) min -= min / 4; - +#ifdef CONFIG_CMA + if (alloc_flags & ALLOC_CMA) + free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES); +#endif if (free_pages <= min + lowmem_reserve) return false; for (o = 0; o < order; o++) { @@ -2303,7 +2308,10 @@ gfp_to_alloc_flags(gfp_t gfp_mask) unlikely(test_thread_flag(TIF_MEMDIE)))) alloc_flags |= ALLOC_NO_WATERMARKS; } - +#ifdef CONFIG_CMA + if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) + alloc_flags |= ALLOC_CMA; +#endif return alloc_flags; } @@ -2533,6 +2541,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, struct page *page = NULL; int migratetype = allocflags_to_migratetype(gfp_mask); unsigned int cpuset_mems_cookie; + int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET; gfp_mask &= gfp_allowed_mask; @@ -2561,9 +2570,13 @@ retry_cpuset: if (!preferred_zone) goto out; +#ifdef CONFIG_CMA + if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) + alloc_flags |= ALLOC_CMA; +#endif /* First allocation attempt */ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, - zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET, + zonelist, high_zoneidx, alloc_flags, preferred_zone, migratetype); if (unlikely(!page)) page = __alloc_pages_slowpath(gfp_mask, order, > > Cc: Marek Szyprowski <m.szyprowski@xxxxxxxxxxx> > Cc: Michal Nazarewicz <mina86@xxxxxxxxxx> > Cc: Minchan Kim <minchan@xxxxxxxxxx> > Cc: Mel Gorman <mgorman@xxxxxxx> > Cc: Hugh Dickins <hughd@xxxxxxxxxx> > Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@xxxxxxxxxxx> > Signed-off-by: Kyungmin Park <kyungmin.park@xxxxxxxxxxx> > --- > include/linux/mmzone.h | 2 +- > mm/compaction.c | 11 ++++++----- > mm/page_alloc.c | 29 +++++++++++++++++++---------- > mm/vmscan.c | 4 ++-- > 4 files changed, 28 insertions(+), 18 deletions(-) > > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h > index 904889d..308bb91 100644 > --- a/include/linux/mmzone.h > +++ b/include/linux/mmzone.h > @@ -725,7 +725,7 @@ extern struct mutex zonelists_mutex; > void build_all_zonelists(pg_data_t *pgdat, struct zone *zone); > void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); > bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, > - int classzone_idx, int alloc_flags); > + int classzone_idx, int alloc_flags, gfp_t gfp_flags); > bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, > int classzone_idx, int alloc_flags); > enum memmap_context { > diff --git a/mm/compaction.c b/mm/compaction.c > index 4b902aa..080175a 100644 > --- a/mm/compaction.c > +++ b/mm/compaction.c > @@ -684,7 +684,7 @@ static int compact_finished(struct zone *zone, > watermark = low_wmark_pages(zone); > watermark += (1 << cc->order); > > - if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) > + if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0, 0)) > return COMPACT_CONTINUE; > > /* Direct compactor: Is a suitable page free? */ > @@ -726,7 +726,7 @@ unsigned long compaction_suitable(struct zone *zone, int order) > * allocated and for a short time, the footprint is higher > */ > watermark = low_wmark_pages(zone) + (2UL << order); > - if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) > + if (!zone_watermark_ok(zone, 0, watermark, 0, 0, 0)) > return COMPACT_SKIPPED; > > /* > @@ -745,7 +745,7 @@ unsigned long compaction_suitable(struct zone *zone, int order) > return COMPACT_SKIPPED; > > if (fragindex == -1000 && zone_watermark_ok(zone, order, watermark, > - 0, 0)) > + 0, 0, 0)) > return COMPACT_PARTIAL; > > return COMPACT_CONTINUE; > @@ -889,7 +889,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, > rc = max(status, rc); > > /* If a normal allocation would succeed, stop compacting */ > - if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0)) > + if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0, > + gfp_mask)) > break; > } > > @@ -920,7 +921,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc) > > if (cc->order > 0) { > int ok = zone_watermark_ok(zone, cc->order, > - low_wmark_pages(zone), 0, 0); > + low_wmark_pages(zone), 0, 0, 0); > if (ok && cc->order >= zone->compact_order_failed) > zone->compact_order_failed = cc->order + 1; > /* Currently async compaction is never deferred. */ > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index 2166774..5912a8c 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -1423,7 +1423,7 @@ int split_free_page(struct page *page, bool check_wmark) > if (check_wmark) { > /* Obey watermarks as if the page was being allocated */ > watermark = low_wmark_pages(zone) + (1 << order); > - if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) > + if (!zone_watermark_ok(zone, 0, watermark, 0, 0, 0)) > return 0; > } > > @@ -1628,12 +1628,13 @@ static inline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) > * of the allocation. > */ > static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, > - int classzone_idx, int alloc_flags, long free_pages, long free_cma_pages) > + int classzone_idx, int alloc_flags, long free_pages, > + long free_cma_pages, gfp_t gfp_flags) > { > /* free_pages my go negative - that's OK */ > long min = mark; > long lowmem_reserve = z->lowmem_reserve[classzone_idx]; > - int o; > + int mt, o; > > free_pages -= (1 << order) - 1; > if (alloc_flags & ALLOC_HIGH) > @@ -1641,8 +1642,14 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, > if (alloc_flags & ALLOC_HARDER) > min -= min / 4; > > - if (free_pages - free_cma_pages <= min + lowmem_reserve) > - return false; > + mt = allocflags_to_migratetype(gfp_flags); > + if (mt == MIGRATE_MOVABLE) { > + if (free_pages <= min + lowmem_reserve) > + return false; > + } else { > + if (free_pages - free_cma_pages <= min + lowmem_reserve) > + return false; > + } > for (o = 0; o < order; o++) { > /* At the next order, this order's pages become unavailable */ > free_pages -= z->free_area[o].nr_free << o; > @@ -1671,11 +1678,12 @@ static inline unsigned long nr_zone_isolate_freepages(struct zone *zone) > #endif > > bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, > - int classzone_idx, int alloc_flags) > + int classzone_idx, int alloc_flags, gfp_t gfp_flags) > { > return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, > zone_page_state(z, NR_FREE_PAGES), > - zone_page_state(z, NR_FREE_CMA_PAGES)); > + zone_page_state(z, NR_FREE_CMA_PAGES), > + gfp_flags); > } > > bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, > @@ -1696,7 +1704,7 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, > */ > free_pages -= nr_zone_isolate_freepages(z); > return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, > - free_pages, free_cma_pages); > + free_pages, free_cma_pages, 0); > } > > #ifdef CONFIG_NUMA > @@ -1906,7 +1914,7 @@ zonelist_scan: > > mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; > if (zone_watermark_ok(zone, order, mark, > - classzone_idx, alloc_flags)) > + classzone_idx, alloc_flags, gfp_mask)) > goto try_this_zone; > > if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) { > @@ -1942,7 +1950,8 @@ zonelist_scan: > default: > /* did we reclaim enough */ > if (!zone_watermark_ok(zone, order, mark, > - classzone_idx, alloc_flags)) > + classzone_idx, alloc_flags, > + gfp_mask)) > goto this_zone_full; > } > } > diff --git a/mm/vmscan.c b/mm/vmscan.c > index 8d01243..4a10038b 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -2777,14 +2777,14 @@ out: > > /* Confirm the zone is balanced for order-0 */ > if (!zone_watermark_ok(zone, 0, > - high_wmark_pages(zone), 0, 0)) { > + high_wmark_pages(zone), 0, 0, 0)) { > order = sc.order = 0; > goto loop_again; > } > > /* Check if the memory needs to be defragmented. */ > if (zone_watermark_ok(zone, order, > - low_wmark_pages(zone), *classzone_idx, 0)) > + low_wmark_pages(zone), *classzone_idx, 0, 0)) > zones_need_compaction = 0; > > /* If balanced, clear the congested flag */ > -- > 1.7.11.3 > > -- > To unsubscribe, send a message with 'unsubscribe linux-mm' in > the body to majordomo@xxxxxxxxx. For more info on Linux MM, > see: http://www.linux-mm.org/ . > Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a> -- Kind regards, Minchan Kim -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>