On Friday 14 September 2012 06:13:34 Minchan Kim wrote: > On Tue, Sep 04, 2012 at 03:26:25PM +0200, Bartlomiej Zolnierkiewicz wrote: > > Pass GFP flags to [__]zone_watermark_ok() and use them to account > > free CMA pages only when necessary (there is no need to check > > watermark against only non-CMA free pages for movable allocations). > > I want to make it zero-overhead in case of !CONFIG_CMA. > We can reduce the number of zone_watermark_ok's argument and in case of !CONFIG_CMA, > overhead would be zero. > > How about this? > (Below is what I want to show the *concept*, NOT completed, NOT compile tested) > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index 009ac28..61c592a 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -1514,6 +1514,8 @@ failed: > #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ > #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ > > +#define ALLOC_CMA 0x80 > + > #ifdef CONFIG_FAIL_PAGE_ALLOC > > static struct { > @@ -1608,7 +1610,10 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, > min -= min / 2; > if (alloc_flags & ALLOC_HARDER) > min -= min / 4; > - > +#ifdef CONFIG_CMA > + if (alloc_flags & ALLOC_CMA) This should be (!(alloc_flags & ALLOC_CMA)) because we want to decrease free pages when the flag is not set (== unmovable allocation). > + free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES); > +#endif > if (free_pages <= min + lowmem_reserve) > return false; > for (o = 0; o < order; o++) { > @@ -2303,7 +2308,10 @@ gfp_to_alloc_flags(gfp_t gfp_mask) > unlikely(test_thread_flag(TIF_MEMDIE)))) > alloc_flags |= ALLOC_NO_WATERMARKS; > } > - > +#ifdef CONFIG_CMA > + if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) > + alloc_flags |= ALLOC_CMA; > +#endif > return alloc_flags; > } > > @@ -2533,6 +2541,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, > struct page *page = NULL; > int migratetype = allocflags_to_migratetype(gfp_mask); > unsigned int cpuset_mems_cookie; > + int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET; > > gfp_mask &= gfp_allowed_mask; > > @@ -2561,9 +2570,13 @@ retry_cpuset: > if (!preferred_zone) > goto out; > > +#ifdef CONFIG_CMA > + if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) > + alloc_flags |= ALLOC_CMA; > +#endif > /* First allocation attempt */ > page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, > - zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET, > + zonelist, high_zoneidx, alloc_flags, > preferred_zone, migratetype); > if (unlikely(!page)) > page = __alloc_pages_slowpath(gfp_mask, order, Otherwise the change to ALLOC_CMA looks good and I'll do it in the next revision of the patchset. Thanks for review & useful ideas! Best regards, -- Bartlomiej Zolnierkiewicz Samsung Poland R&D Center > > Cc: Marek Szyprowski <m.szyprowski@xxxxxxxxxxx> > > Cc: Michal Nazarewicz <mina86@xxxxxxxxxx> > > Cc: Minchan Kim <minchan@xxxxxxxxxx> > > Cc: Mel Gorman <mgorman@xxxxxxx> > > Cc: Hugh Dickins <hughd@xxxxxxxxxx> > > Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@xxxxxxxxxxx> > > Signed-off-by: Kyungmin Park <kyungmin.park@xxxxxxxxxxx> > > --- > > include/linux/mmzone.h | 2 +- > > mm/compaction.c | 11 ++++++----- > > mm/page_alloc.c | 29 +++++++++++++++++++---------- > > mm/vmscan.c | 4 ++-- > > 4 files changed, 28 insertions(+), 18 deletions(-) > > > > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h > > index 904889d..308bb91 100644 > > --- a/include/linux/mmzone.h > > +++ b/include/linux/mmzone.h > > @@ -725,7 +725,7 @@ extern struct mutex zonelists_mutex; > > void build_all_zonelists(pg_data_t *pgdat, struct zone *zone); > > void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); > > bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, > > - int classzone_idx, int alloc_flags); > > + int classzone_idx, int alloc_flags, gfp_t gfp_flags); > > bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, > > int classzone_idx, int alloc_flags); > > enum memmap_context { > > diff --git a/mm/compaction.c b/mm/compaction.c > > index 4b902aa..080175a 100644 > > --- a/mm/compaction.c > > +++ b/mm/compaction.c > > @@ -684,7 +684,7 @@ static int compact_finished(struct zone *zone, > > watermark = low_wmark_pages(zone); > > watermark += (1 << cc->order); > > > > - if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) > > + if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0, 0)) > > return COMPACT_CONTINUE; > > > > /* Direct compactor: Is a suitable page free? */ > > @@ -726,7 +726,7 @@ unsigned long compaction_suitable(struct zone *zone, int order) > > * allocated and for a short time, the footprint is higher > > */ > > watermark = low_wmark_pages(zone) + (2UL << order); > > - if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) > > + if (!zone_watermark_ok(zone, 0, watermark, 0, 0, 0)) > > return COMPACT_SKIPPED; > > > > /* > > @@ -745,7 +745,7 @@ unsigned long compaction_suitable(struct zone *zone, int order) > > return COMPACT_SKIPPED; > > > > if (fragindex == -1000 && zone_watermark_ok(zone, order, watermark, > > - 0, 0)) > > + 0, 0, 0)) > > return COMPACT_PARTIAL; > > > > return COMPACT_CONTINUE; > > @@ -889,7 +889,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, > > rc = max(status, rc); > > > > /* If a normal allocation would succeed, stop compacting */ > > - if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0)) > > + if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0, > > + gfp_mask)) > > break; > > } > > > > @@ -920,7 +921,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc) > > > > if (cc->order > 0) { > > int ok = zone_watermark_ok(zone, cc->order, > > - low_wmark_pages(zone), 0, 0); > > + low_wmark_pages(zone), 0, 0, 0); > > if (ok && cc->order >= zone->compact_order_failed) > > zone->compact_order_failed = cc->order + 1; > > /* Currently async compaction is never deferred. */ > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > > index 2166774..5912a8c 100644 > > --- a/mm/page_alloc.c > > +++ b/mm/page_alloc.c > > @@ -1423,7 +1423,7 @@ int split_free_page(struct page *page, bool check_wmark) > > if (check_wmark) { > > /* Obey watermarks as if the page was being allocated */ > > watermark = low_wmark_pages(zone) + (1 << order); > > - if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) > > + if (!zone_watermark_ok(zone, 0, watermark, 0, 0, 0)) > > return 0; > > } > > > > @@ -1628,12 +1628,13 @@ static inline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) > > * of the allocation. > > */ > > static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, > > - int classzone_idx, int alloc_flags, long free_pages, long free_cma_pages) > > + int classzone_idx, int alloc_flags, long free_pages, > > + long free_cma_pages, gfp_t gfp_flags) > > { > > /* free_pages my go negative - that's OK */ > > long min = mark; > > long lowmem_reserve = z->lowmem_reserve[classzone_idx]; > > - int o; > > + int mt, o; > > > > free_pages -= (1 << order) - 1; > > if (alloc_flags & ALLOC_HIGH) > > @@ -1641,8 +1642,14 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, > > if (alloc_flags & ALLOC_HARDER) > > min -= min / 4; > > > > - if (free_pages - free_cma_pages <= min + lowmem_reserve) > > - return false; > > + mt = allocflags_to_migratetype(gfp_flags); > > + if (mt == MIGRATE_MOVABLE) { > > + if (free_pages <= min + lowmem_reserve) > > + return false; > > + } else { > > + if (free_pages - free_cma_pages <= min + lowmem_reserve) > > + return false; > > + } > > for (o = 0; o < order; o++) { > > /* At the next order, this order's pages become unavailable */ > > free_pages -= z->free_area[o].nr_free << o; > > @@ -1671,11 +1678,12 @@ static inline unsigned long nr_zone_isolate_freepages(struct zone *zone) > > #endif > > > > bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, > > - int classzone_idx, int alloc_flags) > > + int classzone_idx, int alloc_flags, gfp_t gfp_flags) > > { > > return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, > > zone_page_state(z, NR_FREE_PAGES), > > - zone_page_state(z, NR_FREE_CMA_PAGES)); > > + zone_page_state(z, NR_FREE_CMA_PAGES), > > + gfp_flags); > > } > > > > bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, > > @@ -1696,7 +1704,7 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, > > */ > > free_pages -= nr_zone_isolate_freepages(z); > > return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, > > - free_pages, free_cma_pages); > > + free_pages, free_cma_pages, 0); > > } > > > > #ifdef CONFIG_NUMA > > @@ -1906,7 +1914,7 @@ zonelist_scan: > > > > mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; > > if (zone_watermark_ok(zone, order, mark, > > - classzone_idx, alloc_flags)) > > + classzone_idx, alloc_flags, gfp_mask)) > > goto try_this_zone; > > > > if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) { > > @@ -1942,7 +1950,8 @@ zonelist_scan: > > default: > > /* did we reclaim enough */ > > if (!zone_watermark_ok(zone, order, mark, > > - classzone_idx, alloc_flags)) > > + classzone_idx, alloc_flags, > > + gfp_mask)) > > goto this_zone_full; > > } > > } > > diff --git a/mm/vmscan.c b/mm/vmscan.c > > index 8d01243..4a10038b 100644 > > --- a/mm/vmscan.c > > +++ b/mm/vmscan.c > > @@ -2777,14 +2777,14 @@ out: > > > > /* Confirm the zone is balanced for order-0 */ > > if (!zone_watermark_ok(zone, 0, > > - high_wmark_pages(zone), 0, 0)) { > > + high_wmark_pages(zone), 0, 0, 0)) { > > order = sc.order = 0; > > goto loop_again; > > } > > > > /* Check if the memory needs to be defragmented. */ > > if (zone_watermark_ok(zone, order, > > - low_wmark_pages(zone), *classzone_idx, 0)) > > + low_wmark_pages(zone), *classzone_idx, 0, 0)) > > zones_need_compaction = 0; > > > > /* If balanced, clear the congested flag */ -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>