From: Zi Yan <ziy@xxxxxxxxxx> With the new buddy_alloc_max_order, users can specify larger MAX_ORDER than set in CONFIG_ARCH_MAX_ORDER or CONFIG_SET_MAX_ORDER. It can be set any value >= CONFIG_ARCH_MAX_ORDER or CONFIG_SET_MAX_ORDER, but < 256 (limited by vmscan scan_control and per-cpu free page list). Signed-off-by: Zi Yan <ziy@xxxxxxxxxx> Cc: Jonathan Corbet <corbet@xxxxxxx> Cc: "Paul E. McKenney" <paulmck@xxxxxxxxxx> Cc: Randy Dunlap <rdunlap@xxxxxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Vlastimil Babka <vbabka@xxxxxxx> Cc: linux-doc@xxxxxxxxxxxxxxx Cc: linux-mm@xxxxxxxxx Cc: linux-kernel@xxxxxxxxxxxxxxx --- .../admin-guide/kernel-parameters.txt | 5 +++ include/linux/mmzone.h | 23 +++++++++++-- mm/page_alloc.c | 34 ++++++++++++++++++- mm/vmscan.c | 1 - 4 files changed, 58 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 5c59a5fb17c3..a37141aa28ae 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -465,6 +465,11 @@ bttv.pll= See Documentation/admin-guide/media/bttv.rst bttv.tuner= + buddy_alloc_max_order= [KNL] This parameter adjusts the size of largest + pages that can be allocated from kernel buddy allocator. The largest + page size is 2^buddy_alloc_max_order * PAGE_SIZE. + Format: integer + bulk_remove=off [PPC] This parameter disables the use of the pSeries firmware feature for flushing multiple hpte entries at a time. diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 379dada82d4b..9ca4d59722a1 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -26,14 +26,25 @@ /* Free memory management - zoned buddy allocator. */ #ifndef CONFIG_ARCH_FORCE_MAX_ORDER #ifdef CONFIG_SET_MAX_ORDER -#define MAX_ORDER CONFIG_SET_MAX_ORDER +/* Defined in mm/page_alloc.c */ +extern int buddy_alloc_max_order; + +#define MAX_ORDER buddy_alloc_max_order #define MIN_MAX_ORDER CONFIG_SET_MAX_ORDER #else #define MAX_ORDER 11 #define MIN_MAX_ORDER MAX_ORDER #endif /* CONFIG_SET_MAX_ORDER */ #else + +#ifdef CONFIG_SPARSEMEM_VMEMMAP +/* Defined in mm/page_alloc.c */ +extern int buddy_alloc_max_order; + +#define MAX_ORDER buddy_alloc_max_order +#else #define MAX_ORDER CONFIG_ARCH_FORCE_MAX_ORDER +#endif /* CONFIG_SPARSEMEM_VMEMMAP */ #define MIN_MAX_ORDER CONFIG_ARCH_FORCE_MAX_ORDER #endif /* CONFIG_ARCH_FORCE_MAX_ORDER */ #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1)) @@ -1557,8 +1568,14 @@ void sparse_init(void); * pfn_valid_within() should be used in this case; we optimise this away * when we have no holes within a MAX_ORDER_NR_PAGES block. */ -#if ((MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS) -#define pfn_valid_within(pfn) pfn_valid(pfn) +#if defined(CONFIG_ARCH_FORCE_MAX_ORDER) || defined(CONFIG_SET_MAX_ORDER) +static inline bool pfn_valid_within(unsigned long pfn) +{ + if ((MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS) + return pfn_valid(pfn); + + return 1; +} #else #define pfn_valid_within(pfn) (1) #endif diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bfa6962f7615..ea6f8d85a4cf 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1487,7 +1487,6 @@ static void free_pcppages_bulk(struct zone *zone, int count, batch_free = count; order = pindex_to_order(pindex); - BUILD_BUG_ON(MAX_ORDER >= (1<<NR_PCP_ORDER_WIDTH)); do { page = list_last_entry(list, struct page, lru); /* must delete to avoid corrupting pcp list */ @@ -9508,3 +9507,36 @@ bool take_page_off_buddy(struct page *page) return ret; } #endif + +#if (defined(CONFIG_ARCH_FORCE_MAX_ORDER) && defined(CONFIG_SPARSEMEM_VMEMMAP)) \ + || defined(CONFIG_SET_MAX_ORDER) +int buddy_alloc_max_order = MIN_MAX_ORDER; +EXPORT_SYMBOL(buddy_alloc_max_order); + +static int __init buddy_alloc_set(char *val) +{ + int ret; + unsigned long max_order; + + ret = kstrtoul(val, 10, &max_order); + + if (ret < 0) + return -EINVAL; + + /* + * max_order is also limited at below locations: + * 1. scan_control in mm/vmscan.c uses s8 field for order, max_order cannot + * be bigger than S8_MAX before the field is changed. + * 2. free_pcppages_bulk has max_order upper limit. + */ + if (max_order > MIN_MAX_ORDER && max_order < S8_MAX && + max_order < (1<<NR_PCP_ORDER_WIDTH)) + buddy_alloc_max_order = max_order; + else + buddy_alloc_max_order = MIN_MAX_ORDER; + + return 0; +} + +early_param("buddy_alloc_max_order", buddy_alloc_set); +#endif diff --git a/mm/vmscan.c b/mm/vmscan.c index 403a175a720f..9a3963c6166e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3610,7 +3610,6 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, * scan_control uses s8 fields for order, priority, and reclaim_idx. * Confirm they are large enough for max values. */ - BUILD_BUG_ON(MAX_ORDER > S8_MAX); BUILD_BUG_ON(DEF_PRIORITY > S8_MAX); BUILD_BUG_ON(MAX_NR_ZONES > S8_MAX); -- 2.30.2