From: Zi Yan <ziy@xxxxxxxxxx> With the new buddy_alloc_max_order, users can specify larger MAX_ORDER than set in CONFIG_ARCH_MAX_ORDER or CONFIG_SET_MAX_ORDER. It can be set any value >= CONFIG_ARCH_MAX_ORDER or CONFIG_SET_MAX_ORDER, but < 256 (limited by vmscan scan_control and per-cpu free page list). Signed-off-by: Zi Yan <ziy@xxxxxxxxxx> Cc: Jonathan Corbet <corbet@xxxxxxx> Cc: "Paul E. McKenney" <paulmck@xxxxxxxxxx> Cc: Randy Dunlap <rdunlap@xxxxxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Vlastimil Babka <vbabka@xxxxxxx> Cc: linux-doc@xxxxxxxxxxxxxxx Cc: linux-mm@xxxxxxxxx Cc: linux-kernel@xxxxxxxxxxxxxxx --- .../admin-guide/kernel-parameters.txt | 5 +++ include/linux/mmzone.h | 10 +++++- mm/Kconfig | 13 ++++++++ mm/page_alloc.c | 31 +++++++++++++++++++ mm/vmscan.c | 1 - 5 files changed, 58 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 5f633844daac..eb0dd8a78205 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -494,6 +494,11 @@ bttv.pll= See Documentation/admin-guide/media/bttv.rst bttv.tuner= + buddy_alloc_max_order= [KNL] This parameter adjusts the size of largest + pages that can be allocated from kernel buddy allocator. The largest + page size is 2^buddy_alloc_max_order * PAGE_SIZE. + Format: integer + bulk_remove=off [PPC] This parameter disables the use of the pSeries firmware feature for flushing multiple hpte entries at a time. diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 19fca391f635..5669191d15dc 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -35,6 +35,14 @@ #define MIN_MAX_ORDER MAX_ORDER #endif +/* remap MAX_ORDER to buddy_alloc_max_order for boot time adjustment */ +#ifdef CONFIG_BOOT_TIME_MAX_ORDER +/* Defined in mm/page_alloc.c */ +extern int buddy_alloc_max_order; +#undef MAX_ORDER +#define MAX_ORDER buddy_alloc_max_order +#endif /* CONFIG_BOOT_TIME_MAX_ORDER */ + #define MAX_ORDER_NR_PAGES (1 << MAX_ORDER) /* @@ -1600,7 +1608,7 @@ static inline bool movable_only_nodes(nodemask_t *nodes) * contiguous, thus > section size pages can be allocated and manipulated * without worrying about non-contiguous struct page. */ -#ifndef CONFIG_SET_MAX_ORDER +#if !defined(CONFIG_SET_MAX_ORDER) && !defined(CONFIG_BOOT_TIME_MAX_ORDER) #if (MAX_ORDER + PAGE_SHIFT) > SECTION_SIZE_BITS #error Allocator MAX_ORDER exceeds SECTION_SIZE #endif diff --git a/mm/Kconfig b/mm/Kconfig index 9c7280acd528..3e6b61ba9fec 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -449,6 +449,19 @@ config SET_MAX_ORDER increase this value. A value of 10 means that the largest free memory block is 2^10 pages. +config BOOT_TIME_MAX_ORDER + bool "Set maximum order of buddy allocator at boot time" + depends on SPARSEMEM_VMEMMAP && (ARCH_FORCE_MAX_ORDER != 0 || SET_MAX_ORDER != 0) + help + It enables users to set the maximum order of buddy allocator at system + boot time instead of a static MACRO set at compilation time. Systems with + a lot of memory might want to allocate large pages whereas it is much + less feasible and desirable for systems with less memory. This option + allows different systems to control the largest page they want to + allocate. By default, MAX_ORDER will be set to ARCH_FORCE_MAX_ORDER or + SET_MAX_ORDER, whichever is non-zero, when the boot time parameter is not + set. The maximum of MAX_ORDER is currently limited at 256. + config HAVE_MEMBLOCK_PHYS_MAP bool diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ba7c284ba3d3..9eacdf3a37c4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -9720,3 +9720,34 @@ bool has_managed_dma(void) return false; } #endif /* CONFIG_ZONE_DMA */ + +#ifdef CONFIG_BOOT_TIME_MAX_ORDER +int buddy_alloc_max_order = MIN_MAX_ORDER; +EXPORT_SYMBOL(buddy_alloc_max_order); + +static int __init buddy_alloc_set(char *val) +{ + int ret; + unsigned long max_order; + + ret = kstrtoul(val, 10, &max_order); + + if (ret < 0) + return -EINVAL; + + /* + * max_order is also limited at below locations: + * 1. scan_control in mm/vmscan.c uses s8 field for order, max_order cannot + * be bigger than S8_MAX before the field is changed. + * 2. free_pcppages_bulk has max_order upper limit. + */ + if (max_order > MIN_MAX_ORDER && max_order <= S8_MAX) + buddy_alloc_max_order = max_order; + else + buddy_alloc_max_order = MIN_MAX_ORDER; + + return 0; +} + +early_param("buddy_alloc_max_order", buddy_alloc_set); +#endif /* CONFIG_BOOT_TIME_MAX_ORDER */ diff --git a/mm/vmscan.c b/mm/vmscan.c index a8fd6300fa7e..009632243398 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -6623,7 +6623,6 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, * scan_control uses s8 fields for order, priority, and reclaim_idx. * Confirm they are large enough for max values. */ - BUILD_BUG_ON(MAX_ORDER > S8_MAX); BUILD_BUG_ON(DEF_PRIORITY > S8_MAX); BUILD_BUG_ON(MAX_NR_ZONES > S8_MAX); -- 2.35.1