On 10/12/2017 04:06 PM, Anshuman Khandual wrote: > On 10/12/2017 07:16 AM, Mike Kravetz wrote: >> The following is a 'possible' way to add such functionality. I just >> did what was easy and pre-allocated contiguous pages which are used >> to populate the mapping. I did not use any of the higher order >> allocators such as alloc_contig_range. Therefore, it is limited to > Just tried with a small prototype with an implementation similar to that > of alloc_gigantic_page() where we scan the zones (applicable zonelist) > for contiguous valid PFN range and try allocating with alloc_contig_range. > Will share it soon. > With this patch on top of the series can allocate little more than twice of 1UL << (MAX_ORDER - 1) number of pages on POWER. But the problem is it keeps on reducing every attempt till it reaches 1UL << (MAX_ORDER - 1). Will look into it. diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h index 03c06ba..ce13b36 100644 --- a/arch/powerpc/include/uapi/asm/mman.h +++ b/arch/powerpc/include/uapi/asm/mman.h @@ -28,5 +28,6 @@ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ #define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ #define MAP_HUGETLB 0x40000 /* create a huge page mapping */ +#define MAP_CONTIG 0x80000 /* back with contiguous pages */ #endif /* _UAPI_ASM_POWERPC_MMAN_H */ diff --git a/mm/mmap.c b/mm/mmap.c index aee7917..4e6588d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1568,6 +1568,60 @@ struct mmap_arg_struct { } #endif /* __ARCH_WANT_SYS_OLD_MMAP */ +static bool is_pfn_range_valid(struct zone *z, + unsigned long start_pfn, unsigned long nr_pages) +{ + unsigned long i, end_pfn = start_pfn + nr_pages; + struct page *page; + + for (i = start_pfn; i < end_pfn; i++) { + if (!pfn_valid(i)) + return false; + + page = pfn_to_page(i); + if (page_zone(page) != z) + return false; + + if (PageReserved(page)) + return false; + + if (page_count(page) > 0) + return false; + + if (PageHuge(page)) + return false; + } + return true; +} + +struct page * +alloc_pages_vma_contig(gfp_t gfp, int order, struct vm_area_struct *vma, + unsigned long addr, int node, bool hugepage) +{ + struct zonelist *zonelist = node_zonelist(node, gfp); + struct zoneref *z; + struct zone *zone; + unsigned long pfn, nr_pages, flags, ret; + + nr_pages = 1 << order; + for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp), NULL) { + spin_lock_irqsave(&zone->lock, flags); + pfn = ALIGN(zone->zone_start_pfn, nr_pages); + while (zone_spans_pfn(zone, pfn + nr_pages - 1)) { + if (is_pfn_range_valid(zone, pfn, nr_pages)) { + spin_unlock_irqrestore(&zone->lock, flags); + ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE, gfp); + if (!ret) + return pfn_to_page(pfn); + spin_lock_irqsave(&zone->lock, flags); + } + pfn += nr_pages; + } + spin_unlock_irqrestore(&zone->lock, flags); + } + return NULL; +} + /* * Attempt to allocate a contiguous range of pages to back the * specified vma. vm_private_data is used as a 'pointer' to the @@ -1588,11 +1642,19 @@ static long __alloc_vma_contig_range(struct vm_area_struct *vma) * allocations < MAX_ORDER in size. However, this should really * handle arbitrary size allocations. */ + + /* if (order >= MAX_ORDER) return -ENOMEM; - vma->vm_private_data = alloc_pages_vma(gfp, order, vma, vma->vm_start, - numa_node_id(), false); + */ + + if (order >= MAX_ORDER) + vma->vm_private_data = alloc_pages_vma_contig(gfp, order, vma, + vma->vm_start, numa_node_id(), false); + else + vma->vm_private_data = alloc_pages_vma(gfp, order, vma, + vma->vm_start, numa_node_id(), false); if (!vma->vm_private_data) return -ENOMEM; -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>