> On Thu, Oct 06, 2011 at 03:54:42PM +0200, Marek Szyprowski wrote: >> This commit introduces alloc_contig_freed_pages() function >> which allocates (ie. removes from buddy system) free pages >> in range. Caller has to guarantee that all pages in range >> are in buddy system. On Tue, 18 Oct 2011 05:21:09 -0700, Mel Gorman <mel@xxxxxxxxx> wrote: > Straight away, I'm wondering why you didn't use > mm/compaction.c#isolate_freepages() Does the below look like a step in the right direction? It basically moves isolate_freepages_block() to page_alloc.c (changing it name to isolate_freepages_range()) and changes it so that depending on arguments it treats holes (either invalid PFN or non-free page) as errors so that CMA can use it. It also accepts a range rather then just assuming a single pageblock thus the change moves range calculation in compaction.c from isolate_freepages_block() up to isolate_freepages(). The change also modifies spilt_free_page() so that it does not try to change pageblock's migrate type if current migrate type is ISOLATE or CMA. --- include/linux/mm.h | 1 - include/linux/page-isolation.h | 4 +- mm/compaction.c | 73 +++-------------------- mm/internal.h | 5 ++ mm/page_alloc.c | 128 +++++++++++++++++++++++++--------------- 5 files changed, 95 insertions(+), 116 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index fd599f4..98c99c4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -435,7 +435,6 @@ void put_page(struct page *page); void put_pages_list(struct list_head *pages); void split_page(struct page *page, unsigned int order); -int split_free_page(struct page *page); /* * Compound pages have a destructor function. Provide a diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 003c52f..6becc74 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -48,10 +48,8 @@ static inline void unset_migratetype_isolate(struct page *page) } /* The below functions must be run on a range from a single zone. */ -extern unsigned long alloc_contig_freed_pages(unsigned long start, - unsigned long end, gfp_t flag); extern int alloc_contig_range(unsigned long start, unsigned long end, - gfp_t flags, unsigned migratetype); + unsigned migratetype); extern void free_contig_pages(unsigned long pfn, unsigned nr_pages); /* diff --git a/mm/compaction.c b/mm/compaction.c index 9e5cc59..685a19e 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -58,77 +58,15 @@ static unsigned long release_freepages(struct list_head *freelist) return count; } -/* Isolate free pages onto a private freelist. Must hold zone->lock */ -static unsigned long isolate_freepages_block(struct zone *zone, - unsigned long blockpfn, - struct list_head *freelist) -{ - unsigned long zone_end_pfn, end_pfn; - int nr_scanned = 0, total_isolated = 0; - struct page *cursor; - - /* Get the last PFN we should scan for free pages at */ - zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; - end_pfn = min(blockpfn + pageblock_nr_pages, zone_end_pfn); - - /* Find the first usable PFN in the block to initialse page cursor */ - for (; blockpfn < end_pfn; blockpfn++) { - if (pfn_valid_within(blockpfn)) - break; - } - cursor = pfn_to_page(blockpfn); - - /* Isolate free pages. This assumes the block is valid */ - for (; blockpfn < end_pfn; blockpfn++, cursor++) { - int isolated, i; - struct page *page = cursor; - - if (!pfn_valid_within(blockpfn)) - continue; - nr_scanned++; - - if (!PageBuddy(page)) - continue; - - /* Found a free page, break it into order-0 pages */ - isolated = split_free_page(page); - total_isolated += isolated; - for (i = 0; i < isolated; i++) { - list_add(&page->lru, freelist); - page++; - } - - /* If a page was split, advance to the end of it */ - if (isolated) { - blockpfn += isolated - 1; - cursor += isolated - 1; - } - } - - trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated); - return total_isolated; -} - /* Returns true if the page is within a block suitable for migration to */ static bool suitable_migration_target(struct page *page) { - int migratetype = get_pageblock_migratetype(page); /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) return false; - /* Keep MIGRATE_CMA alone as well. */ - /* - * XXX Revisit. We currently cannot let compaction touch CMA - * pages since compaction insists on changing their migration - * type to MIGRATE_MOVABLE (see split_free_page() called from - * isolate_freepages_block() above). - */ - if (is_migrate_cma(migratetype)) - return false; - /* If the page is a large free page, then allow migration */ if (PageBuddy(page) && page_order(page) >= pageblock_order) return true; @@ -149,7 +87,7 @@ static void isolate_freepages(struct zone *zone, struct compact_control *cc) { struct page *page; - unsigned long high_pfn, low_pfn, pfn; + unsigned long high_pfn, low_pfn, pfn, zone_end_pfn, end_pfn; unsigned long flags; int nr_freepages = cc->nr_freepages; struct list_head *freelist = &cc->freepages; @@ -169,6 +107,8 @@ static void isolate_freepages(struct zone *zone, */ high_pfn = min(low_pfn, pfn); + zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; + /* * Isolate free pages until enough are available to migrate the * pages on cc->migratepages. We stop searching if the migrate @@ -176,7 +116,7 @@ static void isolate_freepages(struct zone *zone, */ for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; pfn -= pageblock_nr_pages) { - unsigned long isolated; + unsigned isolated, scanned; if (!pfn_valid(pfn)) continue; @@ -205,7 +145,10 @@ static void isolate_freepages(struct zone *zone, isolated = 0; spin_lock_irqsave(&zone->lock, flags); if (suitable_migration_target(page)) { - isolated = isolate_freepages_block(zone, pfn, freelist); + end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); + isolated = isolate_freepages_range(zone, pfn, + end_pfn, freelist, &scanned); + trace_mm_compaction_isolate_freepages(scanned, isolated); nr_freepages += isolated; } spin_unlock_irqrestore(&zone->lock, flags); diff --git a/mm/internal.h b/mm/internal.h index d071d380..4a9bb3f 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -263,3 +263,8 @@ extern u64 hwpoison_filter_flags_mask; extern u64 hwpoison_filter_flags_value; extern u64 hwpoison_filter_memcg; extern u32 hwpoison_filter_enable; + +unsigned isolate_freepages_range(struct zone *zone, + unsigned long start, unsigned long end, + struct list_head *freelist, + unsigned *scannedp); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index df69706..adf3f34 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1300,10 +1300,11 @@ void split_page(struct page *page, unsigned int order) * Note: this is probably too low level an operation for use in drivers. * Please consult with lkml before using this in your driver. */ -int split_free_page(struct page *page) +static unsigned split_free_page(struct page *page) { unsigned int order; unsigned long watermark; + struct page *endpage; struct zone *zone; BUG_ON(!PageBuddy(page)); @@ -1326,14 +1327,18 @@ int split_free_page(struct page *page) set_page_refcounted(page); split_page(page, order); - if (order >= pageblock_order - 1) { - struct page *endpage = page + (1 << order) - 1; - for (; page < endpage; page += pageblock_nr_pages) - if (!is_pageblock_cma(page)) - set_pageblock_migratetype(page, - MIGRATE_MOVABLE); + if (order < pageblock_order - 1) + goto done; + + endpage = page + (1 << order) - 1; + for (; page < endpage; page += pageblock_nr_pages) { + int mt = get_pageblock_migratetype(page); + /* Don't change CMA nor ISOLATE */ + if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE) + set_pageblock_migratetype(page, MIGRATE_MOVABLE); } +done: return 1 << order; } @@ -5723,57 +5728,76 @@ out: spin_unlock_irqrestore(&zone->lock, flags); } -unsigned long alloc_contig_freed_pages(unsigned long start, unsigned long end, - gfp_t flag) +/** + * isolate_freepages_range() - isolate free pages, must hold zone->lock. + * @zone: Zone pages are in. + * @start: The first PFN to start isolating. + * @end: The one-past-last PFN. + * @freelist: A list to save isolated pages to. + * @scannedp: Optional pointer where to save number of scanned pages. + * + * If @freelist is not provided, holes in range (either non-free pages + * or invalid PFN) are considered an error and function undos its + * actions and returns zero. + * + * If @freelist is provided, function will simply skip non-free and + * missing pages and put only the ones isolated on the list. It will + * also call trace_mm_compaction_isolate_freepages() at the end. + * + * Returns number of isolated pages. This may be more then end-start + * if end fell in a middle of a free page. + */ +unsigned isolate_freepages_range(struct zone *zone, + unsigned long start, unsigned long end, + struct list_head *freelist, unsigned *scannedp) { - unsigned long pfn = start, count; + unsigned nr_scanned = 0, total_isolated = 0; + unsigned long pfn = start; struct page *page; - struct zone *zone; - int order; VM_BUG_ON(!pfn_valid(start)); - page = pfn_to_page(start); - zone = page_zone(page); - spin_lock_irq(&zone->lock); + /* Isolate free pages. This assumes the block is valid */ + page = pfn_to_page(pfn); + while (pfn < end) { + unsigned isolated = 1; - for (;;) { - VM_BUG_ON(!page_count(page) || !PageBuddy(page) || - page_zone(page) != zone); + VM_BUG_ON(page_zone(page) != zone); - list_del(&page->lru); - order = page_order(page); - count = 1UL << order; - zone->free_area[order].nr_free--; - rmv_page_order(page); - __mod_zone_page_state(zone, NR_FREE_PAGES, -(long)count); + if (!pfn_valid_within(blockpfn)) + goto skip; + ++nr_scanned; - pfn += count; - if (pfn >= end) - break; - VM_BUG_ON(!pfn_valid(pfn)); - - if (zone_pfn_same_memmap(pfn - count, pfn)) - page += count; - else - page = pfn_to_page(pfn); - } + if (!PageBuddy(page)) { +skip: + if (freelist) + goto next; + for (; start < pfn; ++start) + __free_page(pfn_to_page(pfn)); + return 0; + } - spin_unlock_irq(&zone->lock); + /* Found a free page, break it into order-0 pages */ + isolated = split_free_page(page); + total_isolated += isolated; + if (freelist) { + struct page *p = page; + unsigned i = isolated; + for (; i--; ++page) + list_add(&p->lru, freelist); + } - /* After this, pages in the range can be freed one be one */ - count = pfn - start; - pfn = start; - for (page = pfn_to_page(pfn); count; --count) { - prep_new_page(page, 0, flag); - ++pfn; - if (likely(zone_pfn_same_memmap(pfn - 1, pfn))) - ++page; +next: /* Advance to the next page */ + pfn += isolated; + if (zone_pfn_same_memmap(pfn - isolated, pfn)) + page += isolated; else page = pfn_to_page(pfn); } - return pfn; + if (scannedp) + *scannedp = nr_scanned; + return total_isolated; } static unsigned long pfn_to_maxpage(unsigned long pfn) @@ -5837,7 +5861,6 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) * alloc_contig_range() -- tries to allocate given range of pages * @start: start PFN to allocate * @end: one-past-the-last PFN to allocate - * @flags: flags passed to alloc_contig_freed_pages(). * @migratetype: migratetype of the underlaying pageblocks (either * #MIGRATE_MOVABLE or #MIGRATE_CMA). All pageblocks * in range must have the same migratetype and it must @@ -5853,9 +5876,10 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) * need to be freed with free_contig_pages(). */ int alloc_contig_range(unsigned long start, unsigned long end, - gfp_t flags, unsigned migratetype) + unsigned migratetype) { unsigned long outer_start, outer_end; + struct zone *zone; int ret; /* @@ -5910,7 +5934,17 @@ int alloc_contig_range(unsigned long start, unsigned long end, return -EINVAL; outer_start = start & (~0UL << ret); - outer_end = alloc_contig_freed_pages(outer_start, end, flags); + + zone = page_zone(pfn_to_page(outer_start)); + spin_lock_irq(&zone->lock); + outer_end = isolate_freepages_range(zone, outer_start, end, NULL, NULL); + spin_unlock_irq(&zone->lock); + + if (!outer_end) { + ret = -EBUSY; + goto done; + } + outer_end += outer_start; /* Free head and tail (if any) */ if (start != outer_start) -- 1.7.3.1 -- To unsubscribe from this list: send the line "unsubscribe linux-media" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html