find_alloc_contig_pages() is a new interface that attempts to locate and allocate a contiguous range of pages. It is provided as a more convenient interface than alloc_contig_range() which is currently used by CMA and gigantic huge pages. When attempting to allocate a range of pages, migration is employed if possible. There is no guarantee that the routine will succeed. So, the user must be prepared for failure and have a fall back plan. Signed-off-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx> --- include/linux/gfp.h | 12 +++++ mm/page_alloc.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 146 insertions(+), 2 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 86a0d06463ab..b0d11777d487 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -573,6 +573,18 @@ static inline bool pm_suspended_storage(void) extern int alloc_contig_range(unsigned long start, unsigned long end, unsigned migratetype, gfp_t gfp_mask); extern void free_contig_range(unsigned long pfn, unsigned long nr_pages); +extern struct page *find_alloc_contig_pages(unsigned long nr_pages, gfp_t gfp, + int nid, nodemask_t *nodemask); +extern void free_contig_pages(struct page *page, unsigned long nr_pages); +#else +static inline struct page *find_alloc_contig_pages(unsigned long nr_pages, + gfp_t gfp, int nid, nodemask_t *nodemask) +{ + return NULL; +} +static inline void free_contig_pages(struct page *page, unsigned long nr_pages) +{ +} #endif #ifdef CONFIG_CMA diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cb1a5e0be6ee..d0a2d0da9eae 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -67,6 +67,7 @@ #include <linux/ftrace.h> #include <linux/lockdep.h> #include <linux/nmi.h> +#include <linux/mmzone.h> #include <asm/sections.h> #include <asm/tlbflush.h> @@ -7913,8 +7914,12 @@ int alloc_contig_range(unsigned long start, unsigned long end, /* Make sure the range is really isolated. */ if (test_pages_isolated(outer_start, end, false)) { - pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n", - __func__, outer_start, end); +#ifdef MIGRATE_CMA + /* Only print messages for CMA allocations */ + if (migratetype == MIGRATE_CMA) + pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n", + __func__, outer_start, end); +#endif ret = -EBUSY; goto done; } @@ -7950,6 +7955,133 @@ void free_contig_range(unsigned long pfn, unsigned long nr_pages) } WARN(count != 0, "%ld pages are still in use!\n", count); } + +/* + * Only check for obvious pfn/pages which can not be used/migrated. The + * migration code will do the final check. Under stress, this minimal set + * has been observed to provide the best results. The checks can be expanded + * if needed. + */ +static bool contig_pfn_range_valid(struct zone *z, unsigned long start_pfn, + unsigned long nr_pages) +{ + unsigned long i, end_pfn = start_pfn + nr_pages; + struct page *page; + + for (i = start_pfn; i < end_pfn; i++) { + if (!pfn_valid(i)) + return false; + + page = pfn_to_online_page(i); + + if (page_zone(page) != z) + return false; + + } + + return true; +} + +/* + * Search for and attempt to allocate contiguous allocations greater than + * MAX_ORDER. + */ +static struct page *__alloc_contig_pages_nodemask(gfp_t gfp, + unsigned long order, + int nid, nodemask_t *nodemask) +{ + unsigned long nr_pages, pfn, flags; + struct page *ret_page = NULL; + struct zonelist *zonelist; + struct zoneref *z; + struct zone *zone; + int rc; + + nr_pages = 1 << order; + zonelist = node_zonelist(nid, gfp); + for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp), + nodemask) { + pgdat_resize_lock(zone->zone_pgdat, &flags); + pfn = ALIGN(zone->zone_start_pfn, nr_pages); + while (zone_spans_pfn(zone, pfn + nr_pages - 1)) { + if (contig_pfn_range_valid(zone, pfn, nr_pages)) { + struct page *page = pfn_to_online_page(pfn); + unsigned int migratetype; + + /* + * All pageblocks in range must be of same + * migrate type. + */ + migratetype = get_pageblock_migratetype(page); + pgdat_resize_unlock(zone->zone_pgdat, &flags); + + rc = alloc_contig_range(pfn, pfn + nr_pages, + migratetype, gfp); + if (!rc) { + ret_page = pfn_to_page(pfn); + return ret_page; + } + pgdat_resize_lock(zone->zone_pgdat, &flags); + } + pfn += nr_pages; + } + pgdat_resize_unlock(zone->zone_pgdat, &flags); + } + + return ret_page; +} + +/** + * find_alloc_contig_pages() -- attempt to find and allocate a contiguous + * range of pages + * @nr_pages: number of pages to find/allocate + * @gfp: gfp mask used to limit search as well as during compaction + * @nid: target node + * @nodemask: mask of other possible nodes + * + * Pages can be freed with a call to free_contig_pages(), or by manually + * calling __free_page() for each page allocated. + * + * Return: pointer to 'order' pages on success, or NULL if not successful. + */ +struct page *find_alloc_contig_pages(unsigned long nr_pages, gfp_t gfp, + int nid, nodemask_t *nodemask) +{ + unsigned long i, alloc_order, order_pages; + struct page *pages; + + /* + * Underlying allocators perform page order sized allocations. + */ + alloc_order = get_count_order(nr_pages); + if (alloc_order < MAX_ORDER) { + pages = __alloc_pages_nodemask(gfp, (unsigned int)alloc_order, + nid, nodemask); + split_page(pages, alloc_order); + } else { + pages = __alloc_contig_pages_nodemask(gfp, alloc_order, nid, + nodemask); + } + + if (pages) { + /* + * More pages than desired could have been allocated due to + * rounding up to next page order. Free any excess pages. + */ + order_pages = 1UL << alloc_order; + for (i = nr_pages; i < order_pages; i++) + __free_page(pages + i); + } + + return pages; +} +EXPORT_SYMBOL_GPL(find_alloc_contig_pages); + +void free_contig_pages(struct page *page, unsigned long nr_pages) +{ + free_contig_range(page_to_pfn(page), nr_pages); +} +EXPORT_SYMBOL_GPL(free_contig_pages); #endif #if defined CONFIG_MEMORY_HOTPLUG || defined CONFIG_CMA -- 2.13.6 -- To unsubscribe from this list: send the line "unsubscribe linux-api" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html