Re: [RFC PATCH 0/3] Add mmap(MAP_CONTIG) support

Anshuman Khandual <khandual@xxxxxxxxxxxxxxxxxx> · Thu, 12 Oct 2017 19:55:54 +0530

On 10/12/2017 04:06 PM, Anshuman Khandual wrote:
> On 10/12/2017 07:16 AM, Mike Kravetz wrote:
>> The following is a 'possible' way to add such functionality.  I just
>> did what was easy and pre-allocated contiguous pages which are used
>> to populate the mapping.  I did not use any of the higher order
>> allocators such as alloc_contig_range.  Therefore, it is limited to
> Just tried with a small prototype with an implementation similar to that
> of alloc_gigantic_page() where we scan the zones (applicable zonelist)
> for contiguous valid PFN range and try allocating with alloc_contig_range.
> Will share it soon.
> 

With this patch on top of the series can allocate little more than
twice of 1UL << (MAX_ORDER - 1) number of pages on POWER. But the
problem is it keeps on reducing every attempt till it reaches
1UL << (MAX_ORDER - 1). Will look into it.

diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h
index 03c06ba..ce13b36 100644
--- a/arch/powerpc/include/uapi/asm/mman.h
+++ b/arch/powerpc/include/uapi/asm/mman.h
@@ -28,5 +28,6 @@
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 #define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
 #define MAP_HUGETLB	0x40000		/* create a huge page mapping */
+#define MAP_CONTIG	0x80000		/* back with contiguous pages */
 
 #endif /* _UAPI_ASM_POWERPC_MMAN_H */
diff --git a/mm/mmap.c b/mm/mmap.c
index aee7917..4e6588d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1568,6 +1568,60 @@ struct mmap_arg_struct {
 }
 #endif /* __ARCH_WANT_SYS_OLD_MMAP */
 
+static bool is_pfn_range_valid(struct zone *z,
+	unsigned long start_pfn, unsigned long nr_pages)
+{
+	unsigned long i, end_pfn = start_pfn + nr_pages;
+	struct page *page;
+
+	for (i = start_pfn; i < end_pfn; i++) {
+		if (!pfn_valid(i))
+			return false;
+
+		page = pfn_to_page(i);
+		if (page_zone(page) != z)
+			return false;
+
+		if (PageReserved(page))
+			return false;
+
+		if (page_count(page) > 0)
+			return false;
+
+		if (PageHuge(page))
+			return false;
+	}
+	return true;
+}
+
+struct page *
+alloc_pages_vma_contig(gfp_t gfp, int order, struct vm_area_struct *vma,
+		unsigned long addr, int node, bool hugepage)
+{
+	struct zonelist *zonelist = node_zonelist(node, gfp);
+	struct zoneref *z;
+	struct zone *zone;
+	unsigned long pfn, nr_pages, flags, ret;
+
+	nr_pages = 1 << order;
+	for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp), NULL) {
+		spin_lock_irqsave(&zone->lock, flags);
+		pfn = ALIGN(zone->zone_start_pfn, nr_pages);
+		while (zone_spans_pfn(zone, pfn + nr_pages - 1)) {
+			if (is_pfn_range_valid(zone, pfn, nr_pages)) {
+				spin_unlock_irqrestore(&zone->lock, flags);
+				ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE, gfp);
+				if (!ret)
+					return pfn_to_page(pfn);
+				spin_lock_irqsave(&zone->lock, flags);
+			}
+			pfn += nr_pages;
+		}
+		spin_unlock_irqrestore(&zone->lock, flags);
+	}
+	return NULL;
+}
+
 /*
  * Attempt to allocate a contiguous range of pages to back the
  * specified vma.  vm_private_data is used as a 'pointer' to the
@@ -1588,11 +1642,19 @@ static long __alloc_vma_contig_range(struct vm_area_struct *vma)
 	 * allocations < MAX_ORDER in size.  However, this should really
 	 * handle arbitrary size allocations.
 	 */
+
+	/*
 	if (order >= MAX_ORDER)
 		return -ENOMEM;
 
-	vma->vm_private_data = alloc_pages_vma(gfp, order, vma, vma->vm_start,
-						numa_node_id(), false);
+	*/
+
+	if (order >= MAX_ORDER)
+		vma->vm_private_data = alloc_pages_vma_contig(gfp, order, vma,
+					vma->vm_start, numa_node_id(), false);
+	else
+		vma->vm_private_data = alloc_pages_vma(gfp, order, vma,
+					vma->vm_start, numa_node_id(), false);
 	if (!vma->vm_private_data)
 		return -ENOMEM;
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>