Re: [PATCH v2 4/4] mm, compaction: direct freepage allocation for async direct compaction

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Mar 31, 2016 at 10:50:36AM +0200, Vlastimil Babka wrote:
> The goal of direct compaction is to quickly make a high-order page available
> for the pending allocation. The free page scanner can add significant latency
> when searching for migration targets, although to succeed the compaction, the
> only important limit on the target free pages is that they must not come from
> the same order-aligned block as the migrated pages.

If we fails migration, free pages will remain and they can interfere
further compaction success because they doesn't come from previous
order-aligned block but can come from next order-aligned block. You
need to free remaining freelist after migration attempt fails?

Thanks.

> 
> This patch therefore makes direct async compaction allocate freepages directly
> from freelists. Pages that do come from the same block (which we cannot simply
> exclude from the freelist allocation) are put on separate list and released
> only after migration to allow them to merge.
> 
> In addition to reduced stall, another advantage is that we split larger free
> pages for migration targets only when smaller pages are depleted, while the
> free scanner can split pages up to (order - 1) as it encouters them. However,
> this approach likely sacrifices some of the long-term anti-fragmentation
> features of a thorough compaction, so we limit the direct allocation approach
> to direct async compaction.
> 
> For observational purposes, the patch introduces two new counters to
> /proc/vmstat. compact_free_direct_alloc counts how many pages were allocated
> directly without scanning, and compact_free_direct_miss counts the subset of
> these allocations that were from the wrong range and had to be held on the
> separate list.
> 
> Signed-off-by: Vlastimil Babka <vbabka@xxxxxxx>
> ---
>  include/linux/vm_event_item.h |  1 +
>  mm/compaction.c               | 52 ++++++++++++++++++++++++++++++++++++++++++-
>  mm/internal.h                 |  5 +++++
>  mm/page_alloc.c               | 27 ++++++++++++++++++++++
>  mm/vmstat.c                   |  2 ++
>  5 files changed, 86 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
> index ec084321fe09..9ec29406a01e 100644
> --- a/include/linux/vm_event_item.h
> +++ b/include/linux/vm_event_item.h
> @@ -51,6 +51,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
>  #endif
>  #ifdef CONFIG_COMPACTION
>  		COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED,
> +		COMPACTFREE_DIRECT_ALLOC, COMPACTFREE_DIRECT_MISS,
>  		COMPACTISOLATED,
>  		COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
>  		KCOMPACTD_WAKE,
> diff --git a/mm/compaction.c b/mm/compaction.c
> index fe94d22d9144..215db281ecaf 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -1083,6 +1083,41 @@ static void isolate_freepages(struct compact_control *cc)
>  	cc->free_pfn = isolate_start_pfn;
>  }
>  
> +static void isolate_freepages_direct(struct compact_control *cc)
> +{
> +	unsigned long nr_pages;
> +	unsigned long flags;
> +
> +	nr_pages = cc->nr_migratepages - cc->nr_freepages;
> +
> +	if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc))
> +		return;
> +
> +	while (nr_pages) {
> +		struct page *page;
> +		unsigned long pfn;
> +
> +		page = alloc_pages_zone(cc->zone, 0, MIGRATE_MOVABLE);
> +		if (!page)
> +			break;
> +		pfn = page_to_pfn(page);
> +
> +		count_compact_event(COMPACTFREE_DIRECT_ALLOC);
> +
> +		/* Is the free page in the block we are migrating from? */
> +		if (pfn >> cc->order ==	(cc->migrate_pfn - 1) >> cc->order) {
> +			list_add(&page->lru, &cc->freepages_held);
> +			count_compact_event(COMPACTFREE_DIRECT_MISS);
> +		} else {
> +			list_add(&page->lru, &cc->freepages);
> +			cc->nr_freepages++;
> +			nr_pages--;
> +		}
> +	}
> +
> +	spin_unlock_irqrestore(&cc->zone->lock, flags);
> +}
> +
>  /*
>   * This is a migrate-callback that "allocates" freepages by taking pages
>   * from the isolated freelists in the block we are migrating to.
> @@ -1099,7 +1134,12 @@ static struct page *compaction_alloc(struct page *migratepage,
>  	 * contention.
>  	 */
>  	if (list_empty(&cc->freepages)) {
> -		if (!cc->contended)
> +		if (cc->contended)
> +			return NULL;
> +
> +		if (cc->direct_compaction && (cc->mode == MIGRATE_ASYNC))
> +			isolate_freepages_direct(cc);
> +		else
>  			isolate_freepages(cc);
>  
>  		if (list_empty(&cc->freepages))
> @@ -1475,6 +1515,10 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
>  						(cc->mode == MIGRATE_ASYNC)) {
>  				cc->migrate_pfn = block_end_pfn(
>  						cc->migrate_pfn - 1, cc->order);
> +
> +				if (!list_empty(&cc->freepages_held))
> +					release_freepages(&cc->freepages_held);
> +
>  				/* Draining pcplists is useless in this case */
>  				cc->last_migrated_pfn = 0;
>  
> @@ -1495,6 +1539,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
>  				block_start_pfn(cc->migrate_pfn, cc->order);
>  
>  			if (cc->last_migrated_pfn < current_block_start) {
> +				if (!list_empty(&cc->freepages_held))
> +					release_freepages(&cc->freepages_held);
>  				cpu = get_cpu();
>  				lru_add_drain_cpu(cpu);
>  				drain_local_pages(zone);
> @@ -1525,6 +1571,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
>  		if (free_pfn > zone->compact_cached_free_pfn)
>  			zone->compact_cached_free_pfn = free_pfn;
>  	}
> +	if (!list_empty(&cc->freepages_held))
> +		release_freepages(&cc->freepages_held);
>  
>  	trace_mm_compaction_end(start_pfn, cc->migrate_pfn,
>  				cc->free_pfn, end_pfn, sync, ret);
> @@ -1553,6 +1601,7 @@ static unsigned long compact_zone_order(struct zone *zone, int order,
>  	};
>  	INIT_LIST_HEAD(&cc.freepages);
>  	INIT_LIST_HEAD(&cc.migratepages);
> +	INIT_LIST_HEAD(&cc.freepages_held);
>  
>  	ret = compact_zone(zone, &cc);
>  
> @@ -1698,6 +1747,7 @@ static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
>  		cc->zone = zone;
>  		INIT_LIST_HEAD(&cc->freepages);
>  		INIT_LIST_HEAD(&cc->migratepages);
> +		INIT_LIST_HEAD(&cc->freepages_held);
>  
>  		/*
>  		 * When called via /proc/sys/vm/compact_memory
> diff --git a/mm/internal.h b/mm/internal.h
> index b79abb6721cf..a0c0286a9567 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -145,6 +145,8 @@ static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
>  }
>  
>  extern int __isolate_free_page(struct page *page, unsigned int order);
> +extern struct page * alloc_pages_zone(struct zone *zone, unsigned int order,
> +							int migratetype);
>  extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
>  					unsigned int order);
>  extern void prep_compound_page(struct page *page, unsigned int order);
> @@ -165,6 +167,9 @@ extern int user_min_free_kbytes;
>  struct compact_control {
>  	struct list_head freepages;	/* List of free pages to migrate to */
>  	struct list_head migratepages;	/* List of pages being migrated */
> +	struct list_head freepages_held;/* List of free pages from the block
> +					 * that's being migrated
> +					 */
>  	unsigned long nr_freepages;	/* Number of isolated free pages */
>  	unsigned long nr_migratepages;	/* Number of pages to migrate */
>  	unsigned long free_pfn;		/* isolate_freepages search base */
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 59de90d5d3a3..3ee83fe02274 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2343,6 +2343,33 @@ int split_free_page(struct page *page)
>  }
>  
>  /*
> + * Like split_free_page, but given the zone, it will grab a free page from
> + * the freelists.
> + */
> +struct page *
> +alloc_pages_zone(struct zone *zone, unsigned int order, int migratetype)
> +{
> +	struct page *page;
> +	unsigned long watermark;
> +
> +	watermark = low_wmark_pages(zone) + (1 << order);
> +	if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
> +		return NULL;
> +
> +	page = __rmqueue(zone, order, migratetype);
> +	if (!page)
> +		return NULL;
> +
> +	__mod_zone_freepage_state(zone, -(1 << order),
> +					  get_pcppage_migratetype(page));
> +
> +	set_page_owner(page, order, __GFP_MOVABLE);
> +	set_page_refcounted(page);
> +
> +	return page;
> +}
> +
> +/*
>   * Allocate a page from the given zone. Use pcplists for order-0 allocations.
>   */
>  static inline
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index 5e4300482897..9e07d11afa0d 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -822,6 +822,8 @@ const char * const vmstat_text[] = {
>  #ifdef CONFIG_COMPACTION
>  	"compact_migrate_scanned",
>  	"compact_free_scanned",
> +	"compact_free_direct_alloc",
> +	"compact_free_direct_miss",
>  	"compact_isolated",
>  	"compact_stall",
>  	"compact_fail",
> -- 
> 2.7.3
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]