On Wed, Mar 24, 2010 at 09:06:51PM +0900, Minchan Kim wrote: > On Wed, Mar 24, 2010 at 8:59 PM, Minchan Kim <minchan.kim@xxxxxxxxx> wrote: > > On Wed, Mar 24, 2010 at 8:11 PM, Mel Gorman <mel@xxxxxxxxx> wrote: > >> On Wed, Mar 24, 2010 at 08:10:40AM +0900, Minchan Kim wrote: > >>> Hi, Mel. > >>> > >>> On Tue, Mar 23, 2010 at 9:25 PM, Mel Gorman <mel@xxxxxxxxx> wrote: > >>> > Ordinarily when a high-order allocation fails, direct reclaim is entered to > >>> > free pages to satisfy the allocation. With this patch, it is determined if > >>> > an allocation failed due to external fragmentation instead of low memory > >>> > and if so, the calling process will compact until a suitable page is > >>> > freed. Compaction by moving pages in memory is considerably cheaper than > >>> > paging out to disk and works where there are locked pages or no swap. If > >>> > compaction fails to free a page of a suitable size, then reclaim will > >>> > still occur. > >>> > > >>> > Direct compaction returns as soon as possible. As each block is compacted, > >>> > it is checked if a suitable page has been freed and if so, it returns. > >>> > > >>> > Signed-off-by: Mel Gorman <mel@xxxxxxxxx> > >>> > Acked-by: Rik van Riel <riel@xxxxxxxxxx> > >>> > --- > >>> > include/linux/compaction.h | 16 +++++- > >>> > include/linux/vmstat.h | 1 + > >>> > mm/compaction.c | 118 ++++++++++++++++++++++++++++++++++++++++++++ > >>> > mm/page_alloc.c | 26 ++++++++++ > >>> > mm/vmstat.c | 15 +++++- > >>> > 5 files changed, 172 insertions(+), 4 deletions(-) > >>> > > >>> > diff --git a/include/linux/compaction.h b/include/linux/compaction.h > >>> > index c94890b..b851428 100644 > >>> > --- a/include/linux/compaction.h > >>> > +++ b/include/linux/compaction.h > >>> > @@ -1,14 +1,26 @@ > >>> > #ifndef _LINUX_COMPACTION_H > >>> > #define _LINUX_COMPACTION_H > >>> > > >>> > -/* Return values for compact_zone() */ > >>> > +/* Return values for compact_zone() and try_to_compact_pages() */ > >>> > #define COMPACT_INCOMPLETE 0 > >>> > -#define COMPACT_COMPLETE 1 > >>> > +#define COMPACT_PARTIAL 1 > >>> > +#define COMPACT_COMPLETE 2 > >>> > > >>> > #ifdef CONFIG_COMPACTION > >>> > extern int sysctl_compact_memory; > >>> > extern int sysctl_compaction_handler(struct ctl_table *table, int write, > >>> > void __user *buffer, size_t *length, loff_t *ppos); > >>> > + > >>> > +extern int fragmentation_index(struct zone *zone, unsigned int order); > >>> > +extern unsigned long try_to_compact_pages(struct zonelist *zonelist, > >>> > + int order, gfp_t gfp_mask, nodemask_t *mask); > >>> > +#else > >>> > +static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, > >>> > + int order, gfp_t gfp_mask, nodemask_t *nodemask) > >>> > +{ > >>> > + return COMPACT_INCOMPLETE; > >>> > +} > >>> > + > >>> > #endif /* CONFIG_COMPACTION */ > >>> > > >>> > #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) > >>> > diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h > >>> > index 56e4b44..b4b4d34 100644 > >>> > --- a/include/linux/vmstat.h > >>> > +++ b/include/linux/vmstat.h > >>> > @@ -44,6 +44,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, > >>> > KSWAPD_SKIP_CONGESTION_WAIT, > >>> > PAGEOUTRUN, ALLOCSTALL, PGROTATED, > >>> > COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED, > >>> > + COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS, > >>> > #ifdef CONFIG_HUGETLB_PAGE > >>> > HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, > >>> > #endif > >>> > diff --git a/mm/compaction.c b/mm/compaction.c > >>> > index 8df6e3d..6688700 100644 > >>> > --- a/mm/compaction.c > >>> > +++ b/mm/compaction.c > >>> > @@ -34,6 +34,8 @@ struct compact_control { > >>> > unsigned long nr_anon; > >>> > unsigned long nr_file; > >>> > > >>> > + unsigned int order; /* order a direct compactor needs */ > >>> > + int migratetype; /* MOVABLE, RECLAIMABLE etc */ > >>> > struct zone *zone; > >>> > }; > >>> > > >>> > @@ -301,10 +303,31 @@ static void update_nr_listpages(struct compact_control *cc) > >>> > static inline int compact_finished(struct zone *zone, > >>> > struct compact_control *cc) > >>> > { > >>> > + unsigned int order; > >>> > + unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order); > >>> > + > >>> > /* Compaction run completes if the migrate and free scanner meet */ > >>> > if (cc->free_pfn <= cc->migrate_pfn) > >>> > return COMPACT_COMPLETE; > >>> > > >>> > + /* Compaction run is not finished if the watermark is not met */ > >>> > + if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) > >>> > + return COMPACT_INCOMPLETE; > >>> > + > >>> > + if (cc->order == -1) > >>> > + return COMPACT_INCOMPLETE; > >>> > + > >>> > + /* Direct compactor: Is a suitable page free? */ > >>> > + for (order = cc->order; order < MAX_ORDER; order++) { > >>> > + /* Job done if page is free of the right migratetype */ > >>> > + if (!list_empty(&zone->free_area[order].free_list[cc->migratetype])) > >>> > + return COMPACT_PARTIAL; > >>> > + > >>> > + /* Job done if allocation would set block type */ > >>> > + if (order >= pageblock_order && zone->free_area[order].nr_free) > >>> > + return COMPACT_PARTIAL; > >>> > + } > >>> > + > >>> > return COMPACT_INCOMPLETE; > >>> > } > >>> > > >>> > @@ -348,6 +371,101 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) > >>> > return ret; > >>> > } > >>> > > >>> > +static inline unsigned long compact_zone_order(struct zone *zone, > >>> > + int order, gfp_t gfp_mask) > >>> > +{ > >>> > + struct compact_control cc = { > >>> > + .nr_freepages = 0, > >>> > + .nr_migratepages = 0, > >>> > + .order = order, > >>> > + .migratetype = allocflags_to_migratetype(gfp_mask), > >>> > + .zone = zone, > >>> > + }; > >>> > + INIT_LIST_HEAD(&cc.freepages); > >>> > + INIT_LIST_HEAD(&cc.migratepages); > >>> > + > >>> > + return compact_zone(zone, &cc); > >>> > +} > >>> > + > >>> > +/** > >>> > + * try_to_compact_pages - Direct compact to satisfy a high-order allocation > >>> > + * @zonelist: The zonelist used for the current allocation > >>> > + * @order: The order of the current allocation > >>> > + * @gfp_mask: The GFP mask of the current allocation > >>> > + * @nodemask: The allowed nodes to allocate from > >>> > + * > >>> > + * This is the main entry point for direct page compaction. > >>> > + */ > >>> > +unsigned long try_to_compact_pages(struct zonelist *zonelist, > >>> > + int order, gfp_t gfp_mask, nodemask_t *nodemask) > >>> > +{ > >>> > + enum zone_type high_zoneidx = gfp_zone(gfp_mask); > >>> > + int may_enter_fs = gfp_mask & __GFP_FS; > >>> > + int may_perform_io = gfp_mask & __GFP_IO; > >>> > + unsigned long watermark; > >>> > + struct zoneref *z; > >>> > + struct zone *zone; > >>> > + int rc = COMPACT_INCOMPLETE; > >>> > + > >>> > + /* Check whether it is worth even starting compaction */ > >>> > + if (order == 0 || !may_enter_fs || !may_perform_io) > >>> > + return rc; > >>> > + > >>> > + /* > >>> > + * We will not stall if the necessary conditions are not met for > >>> > + * migration but direct reclaim seems to account stalls similarly > >>> > + */ > > Then, Let's remove this comment. > Yes, it hinders more than it helps in this case. It's deleted now. -- Mel Gorman Part-time Phd Student Linux Technology Center University of Limerick IBM Dublin Software Lab -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxxx For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>