[PATCH v1 04/11] mm/memremap: add ZONE_DEVICE support for compound pages

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add a new align property for struct dev_pagemap which specifies that a
pagemap is composed of a set of compound pages of size @align, instead
of base pages. When these pages are initialised, most are initialised as
tail pages instead of order-0 pages.

For certain ZONE_DEVICE users like device-dax which have a fixed page
size, this creates an opportunity to optimize GUP and GUP-fast walkers,
treating it the same way as THP or hugetlb pages.

Signed-off-by: Joao Martins <joao.m.martins@xxxxxxxxxx>
---
 include/linux/memremap.h | 13 +++++++++++++
 mm/memremap.c            |  8 ++++++--
 mm/page_alloc.c          | 24 +++++++++++++++++++++++-
 3 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index b46f63dcaed3..bb28d82dda5e 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -114,6 +114,7 @@ struct dev_pagemap {
 	struct completion done;
 	enum memory_type type;
 	unsigned int flags;
+	unsigned long align;
 	const struct dev_pagemap_ops *ops;
 	void *owner;
 	int nr_range;
@@ -130,6 +131,18 @@ static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap)
 	return NULL;
 }
 
+static inline unsigned long pgmap_align(struct dev_pagemap *pgmap)
+{
+	if (!pgmap || !pgmap->align)
+		return PAGE_SIZE;
+	return pgmap->align;
+}
+
+static inline unsigned long pgmap_pfn_align(struct dev_pagemap *pgmap)
+{
+	return PHYS_PFN(pgmap_align(pgmap));
+}
+
 #ifdef CONFIG_ZONE_DEVICE
 bool pfn_zone_device_reserved(unsigned long pfn);
 void *memremap_pages(struct dev_pagemap *pgmap, int nid);
diff --git a/mm/memremap.c b/mm/memremap.c
index 805d761740c4..d160853670c4 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -318,8 +318,12 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
 	memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
 				PHYS_PFN(range->start),
 				PHYS_PFN(range_len(range)), pgmap);
-	percpu_ref_get_many(pgmap->ref, pfn_end(pgmap, range_id)
-			- pfn_first(pgmap, range_id));
+	if (pgmap_align(pgmap) > PAGE_SIZE)
+		percpu_ref_get_many(pgmap->ref, (pfn_end(pgmap, range_id)
+			- pfn_first(pgmap, range_id)) / pgmap_pfn_align(pgmap));
+	else
+		percpu_ref_get_many(pgmap->ref, pfn_end(pgmap, range_id)
+				- pfn_first(pgmap, range_id));
 	return 0;
 
 err_add_memory:
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 58974067bbd4..3a77f9e43f3a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6285,6 +6285,8 @@ void __ref memmap_init_zone_device(struct zone *zone,
 	unsigned long pfn, end_pfn = start_pfn + nr_pages;
 	struct pglist_data *pgdat = zone->zone_pgdat;
 	struct vmem_altmap *altmap = pgmap_altmap(pgmap);
+	unsigned int pfn_align = pgmap_pfn_align(pgmap);
+	unsigned int order_align = order_base_2(pfn_align);
 	unsigned long zone_idx = zone_idx(zone);
 	unsigned long start = jiffies;
 	int nid = pgdat->node_id;
@@ -6302,10 +6304,30 @@ void __ref memmap_init_zone_device(struct zone *zone,
 		nr_pages = end_pfn - start_pfn;
 	}
 
-	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+	for (pfn = start_pfn; pfn < end_pfn; pfn += pfn_align) {
 		struct page *page = pfn_to_page(pfn);
+		unsigned long i;
 
 		__init_zone_device_page(page, pfn, zone_idx, nid, pgmap);
+
+		if (pfn_align == 1)
+			continue;
+
+		__SetPageHead(page);
+
+		for (i = 1; i < pfn_align; i++) {
+			__init_zone_device_page(page + i, pfn + i, zone_idx,
+						nid, pgmap);
+			prep_compound_tail(page, i);
+
+			/*
+			 * The first and second tail pages need to
+			 * initialized first, hence the head page is
+			 * prepared last.
+			 */
+			if (i == 2)
+				prep_compound_head(page, order_align);
+		}
 	}
 
 	pr_info("%s initialised %lu pages in %ums\n", __func__,
-- 
2.17.1





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux