From: Zi Yan <ziy@xxxxxxxxxx> With the new buddy_alloc_max_order, users can specify larger MAX_ORDER than set in CONFIG_ARCH_MAX_ORDER or CONFIG_SET_MAX_ORDER. It can be set any value >= CONFIG_ARCH_MAX_ORDER or CONFIG_SET_MAX_ORDER, but < 256 (limited by vmscan scan_control and per-cpu free page list). Signed-off-by: Zi Yan <ziy@xxxxxxxxxx> Cc: Jonathan Corbet <corbet@xxxxxxx> Cc: "Paul E. McKenney" <paulmck@xxxxxxxxxx> Cc: Randy Dunlap <rdunlap@xxxxxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Vlastimil Babka <vbabka@xxxxxxx> Cc: linux-doc@xxxxxxxxxxxxxxx Cc: linux-mm@xxxxxxxxx Cc: linux-kernel@xxxxxxxxxxxxxxx --- .../admin-guide/kernel-parameters.txt | 5 +++ include/linux/mmzone.h | 8 +++++ mm/Kconfig | 13 +++++++ mm/page_alloc.c | 34 ++++++++++++++++++- mm/vmscan.c | 1 - 5 files changed, 59 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ec519225b671..0f71233ae396 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -494,6 +494,11 @@ bttv.pll= See Documentation/admin-guide/media/bttv.rst bttv.tuner= + buddy_alloc_max_order= [KNL] This parameter adjusts the size of largest + pages that can be allocated from kernel buddy allocator. The largest + page size is 2^buddy_alloc_max_order * PAGE_SIZE. + Format: integer + bulk_remove=off [PPC] This parameter disables the use of the pSeries firmware feature for flushing multiple hpte entries at a time. diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b5774e4c2700..90121d25d660 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -35,6 +35,14 @@ #define MIN_MAX_ORDER MAX_ORDER #endif +/* remap MAX_ORDER to buddy_alloc_max_order for boot time adjustment */ +#ifdef CONFIG_BOOT_TIME_MAX_ORDER +/* Defined in mm/page_alloc.c */ +extern int buddy_alloc_max_order; +#undef MAX_ORDER +#define MAX_ORDER buddy_alloc_max_order +#endif /* CONFIG_BOOT_TIME_MAX_ORDER */ + #define MAX_ORDER_NR_PAGES (1 << MAX_ORDER) /* diff --git a/mm/Kconfig b/mm/Kconfig index e558f5679707..acccb919d72d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -455,6 +455,19 @@ config SET_MAX_ORDER increase this value. A value of 10 means that the largest free memory block is 2^10 pages. +config BOOT_TIME_MAX_ORDER + bool "Set maximum order of buddy allocator at boot time" + depends on SPARSEMEM_VMEMMAP && (ARCH_FORCE_MAX_ORDER != 0 || SET_MAX_ORDER != 0) + help + It enables users to set the maximum order of buddy allocator at system + boot time instead of a static MACRO set at compilation time. Systems with + a lot of memory might want to allocate large pages whereas it is much + less feasible and desirable for systems with less memory. This option + allows different systems to control the largest page they want to + allocate. By default, MAX_ORDER will be set to ARCH_FORCE_MAX_ORDER or + SET_MAX_ORDER, whichever is non-zero, when the boot time parameter is not + set. The maximum of MAX_ORDER is currently limited at 256. + config HAVE_MEMBLOCK_PHYS_MAP bool diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 941a94bb8cf0..4c4d68da1922 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1581,7 +1581,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, order = pindex_to_order(pindex); nr_pages = 1 << order; - BUILD_BUG_ON(MAX_ORDER >= (1<<NR_PCP_ORDER_WIDTH)); + BUILD_BUG_ON(MIN_MAX_ORDER >= (1<<NR_PCP_ORDER_WIDTH)); do { int mt; @@ -9679,3 +9679,35 @@ bool has_managed_dma(void) return false; } #endif /* CONFIG_ZONE_DMA */ + +#ifdef CONFIG_BOOT_TIME_MAX_ORDER +int buddy_alloc_max_order = MIN_MAX_ORDER; +EXPORT_SYMBOL(buddy_alloc_max_order); + +static int __init buddy_alloc_set(char *val) +{ + int ret; + unsigned long max_order; + + ret = kstrtoul(val, 10, &max_order); + + if (ret < 0) + return -EINVAL; + + /* + * max_order is also limited at below locations: + * 1. scan_control in mm/vmscan.c uses s8 field for order, max_order cannot + * be bigger than S8_MAX before the field is changed. + * 2. free_pcppages_bulk has max_order upper limit. + */ + if (max_order > MIN_MAX_ORDER && max_order <= S8_MAX && + max_order <= (1<<NR_PCP_ORDER_WIDTH)) + buddy_alloc_max_order = max_order; + else + buddy_alloc_max_order = MIN_MAX_ORDER; + + return 0; +} + +early_param("buddy_alloc_max_order", buddy_alloc_set); +#endif /* CONFIG_BOOT_TIME_MAX_ORDER */ diff --git a/mm/vmscan.c b/mm/vmscan.c index 06eeeae038dd..9d4fde8705d9 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3816,7 +3816,6 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, * scan_control uses s8 fields for order, priority, and reclaim_idx. * Confirm they are large enough for max values. */ - BUILD_BUG_ON(MAX_ORDER > S8_MAX); BUILD_BUG_ON(DEF_PRIORITY > S8_MAX); BUILD_BUG_ON(MAX_NR_ZONES > S8_MAX); -- 2.35.1