Problem: ======= On arm64, block and section mapping is supported to build page tables. However, currently it enforces to take base page mapping for the whole linear mapping if CONFIG_ZONE_DMA or CONFIG_ZONE_DMA32 is enabled and crashkernel kernel parameter is set. This will cause longer time of the linear mapping process during bootup and severe performance degradation during running time. Root cause: ========== On arm64, crashkernel reservation relies on knowing the upper limit of low memory zone because it needs to reserve memory in the zone so that devices' DMA addressing in kdump kernel can be satisfied. However, the limit on arm64 is variant. And the upper limit can only be decided late till bootmem_init() is called. And we need to map the crashkernel region with base page granularity when doing linear mapping, because kdump needs to protect the crashkernel region via set_memory_valid(,0) after kdump kernel loading. However, arm64 doesn't support well on splitting the built block or section mapping due to some cpu reststriction [1]. And unfortunately, the linear mapping is done before bootmem_init(). To resolve the above conflict on arm64, the compromise is enforcing to take base page mapping for the entire linear mapping if crashkernel is set, and CONFIG_ZONE_DMA or CONFIG_ZONE_DMA32 is enabed. Hence performance is sacrificed. Solution: ========= To fix the problem, we should always take 4G as the crashkernel low memory end in case CONFIG_ZONE_DMA or CONFIG_ZONE_DMA32 is enabled. With this, we don't need to defer the crashkernel reservation till bootmem_init() is called to set the arm64_dma_phys_limit. As long as memblock init is done, we can conclude what is the upper limit of low memory zone. 1) both CONFIG_ZONE_DMA or CONFIG_ZONE_DMA32 are disabled or memblock_start_of_DRAM() > 4G limit = PHYS_ADDR_MAX+1 (Corner cases) 2) CONFIG_ZONE_DMA or CONFIG_ZONE_DMA32 are enabled: limit = 4G (generic case) [1] https://lore.kernel.org/all/YrIIJkhKWSuAqkCx@xxxxxxx/T/#u Signed-off-by: Baoquan He <bhe@xxxxxxxxxx> --- arch/arm64/mm/init.c | 24 ++++++++++++++---------- arch/arm64/mm/mmu.c | 38 ++++++++++++++++++++++---------------- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index b9af30be813e..8ae55afdd11c 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -90,10 +90,22 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit; phys_addr_t __ro_after_init arm64_dma_phys_limit = PHYS_MASK + 1; #endif +static phys_addr_t __init crash_addr_low_max(void) +{ + phys_addr_t low_mem_mask = U32_MAX; + phys_addr_t phys_start = memblock_start_of_DRAM(); + + if ((!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) || + (phys_start > U32_MAX)) + low_mem_mask = PHYS_ADDR_MAX; + + return min(low_mem_mask, memblock_end_of_DRAM() - 1) + 1; +} + /* Current arm64 boot protocol requires 2MB alignment */ #define CRASH_ALIGN SZ_2M -#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit +#define CRASH_ADDR_LOW_MAX crash_addr_low_max() #define CRASH_ADDR_HIGH_MAX (PHYS_MASK + 1) static int __init reserve_crashkernel_low(unsigned long long low_size) @@ -389,8 +401,7 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); - if (!defer_reserve_crashkernel()) - reserve_crashkernel(); + reserve_crashkernel(); high_memory = __va(memblock_end_of_DRAM() - 1) + 1; } @@ -434,13 +445,6 @@ void __init bootmem_init(void) */ dma_contiguous_reserve(arm64_dma_phys_limit); - /* - * request_standard_resources() depends on crashkernel's memory being - * reserved, so do it here. - */ - if (defer_reserve_crashkernel()) - reserve_crashkernel(); - memblock_dump_all(); } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index e7ad44585f40..cdd338fa2115 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -547,13 +547,12 @@ static void __init map_mem(pgd_t *pgdp) memblock_mark_nomap(kernel_start, kernel_end - kernel_start); #ifdef CONFIG_KEXEC_CORE - if (crash_mem_map) { - if (defer_reserve_crashkernel()) - flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; - else if (crashk_res.end) - memblock_mark_nomap(crashk_res.start, - resource_size(&crashk_res)); - } + if (crashk_res.end) + memblock_mark_nomap(crashk_res.start, + resource_size(&crashk_res)); + if (crashk_low_res.end) + memblock_mark_nomap(crashk_low_res.start, + resource_size(&crashk_low_res)); #endif /* map all the memory banks */ @@ -589,16 +588,23 @@ static void __init map_mem(pgd_t *pgdp) * through /sys/kernel/kexec_crash_size interface. */ #ifdef CONFIG_KEXEC_CORE - if (crash_mem_map && !defer_reserve_crashkernel()) { - if (crashk_res.end) { - __map_memblock(pgdp, crashk_res.start, - crashk_res.end + 1, - PAGE_KERNEL, - NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); - memblock_clear_nomap(crashk_res.start, - resource_size(&crashk_res)); - } + if (crashk_res.end) { + __map_memblock(pgdp, crashk_res.start, + crashk_res.end + 1, + PAGE_KERNEL, + NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); + memblock_clear_nomap(crashk_res.start, + resource_size(&crashk_res)); } + if (crashk_low_res.end) { + __map_memblock(pgdp, crashk_low_res.start, + crashk_low_res.end + 1, + PAGE_KERNEL, + NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); + memblock_clear_nomap(crashk_low_res.start, + resource_size(&crashk_low_res)); + } + #endif } -- 2.34.1