Add mailing list: linux-riscv@xxxxxxxxxxxxxxxxxxx,linux-kernel@xxxxxxxxxxxxxxx,linux-mm@xxxxxxxxx On Wed, Jul 24, 2024 at 10:22 PM Yunhui Cui <cuiyunhui@xxxxxxxxxxxxx> wrote: > > When establishing a linear mapping, the virtual address is obtained > through __va(). If the physical address is too large, such as 1TB, then > the virtual address will overflow in the address space of sv39. > The log is as follows: > [ 0.000000] Unable to handle kernel paging request at virtual address 000000d97fdf7ad8 > [ 0.000000] [000000d97fdf7ad8] pgd=000000407ff7e801, p4d=000000407ff7e801, pud=000000407ff7e801 > [ 0.000000] Unable to handle kernel paging request at virtual address 000000d97fdfaff0 > [ 0.000000] [000000d97fdfaff0] pgd=000000407ff7e801, p4d=000000407ff7e801, pud=000000407ff7e801 > ... > [ 0.000000] Insufficient stack space to handle exception! > [ 0.000000] Task stack: [0xffffffff81400000..0xffffffff81404000] > [ 0.000000] Overflow stack: [0xffffffff80c67370..0xffffffff80c68370] > [ 0.000000] CPU: 0 PID: 0 Comm: swapper Tainted: G W 6.6.3-00133-g60497fad461d-dirty #71 > [ 0.000000] epc : die_kernel_fault+0x158/0x1c8 > [ 0.000000] ra : die_kernel_fault+0x12a/0x1c8 > [ 0.000000] epc : ffffffff808cde36 ra : ffffffff808cde08 sp : ffffffff813fff80 > [ 0.000000] gp : ffffffff815a1678 tp : 0000000000000000 t0 : 0000003130386537 > [ 0.000000] t1 : 0000000000000031 t2 : 6537666637303430 s0 : ffffffff813fffc0 > [ 0.000000] s1 : ffffffff815b0b28 a0 : 0000000000000016 a1 : ffffffff81495298 > [ 0.000000] a2 : 0000000000000010 a3 : ffffffff81495298 a4 : 00000000000001fe > [ 0.000000] a5 : 000000d97fdfa000 a6 : ffffffff814250d0 a7 : 0000000000000030 > [ 0.000000] s2 : 000000d97fdfaff0 s3 : ffffffff81400040 s4 : 000000d97fdfaff0 > [ 0.000000] s5 : ffffffff815a0ed0 s6 : 0000000000000000 s7 : 000000008f604390 > [ 0.000000] s8 : 0000000000000000 s9 : ffffffffffffffff s10: 0000000000000000 > [ 0.000000] s11: 0000000000000000 t3 : ffffffff815baa9b t4 : ffffffff815baa9b > [ 0.000000] t5 : ffffffff815baa88 t6 : ffffffff813ffda8 > [ 0.000000] status: 0000000200000100 badaddr: 000000d97fdfaff0 cause: 000000000000000d > [ 0.000000] Kernel panic - not syncing: Kernel stack overflow > [ 0.000000] CPU: 0 PID: 0 Comm: swapper Tainted: G W 6.6.3-00133-g60497fad461d-dirty #71 > [ 0.000000] Call Trace: > [ 0.000000] [<ffffffff800066bc>] dump_backtrace+0x28/0x30 > [ 0.000000] [<ffffffff808cdac8>] show_stack+0x38/0x44 > [ 0.000000] [<ffffffff808d9d40>] dump_stack_lvl+0x44/0x5c > [ 0.000000] [<ffffffff808d9d70>] dump_stack+0x18/0x20 > [ 0.000000] [<ffffffff808cdfb6>] panic+0x110/0x2f2 > [ 0.000000] [<ffffffff80006532>] walk_stackframe+0x0/0x120 > [ 0.000000] [<ffffffff808cde08>] die_kernel_fault+0x12a/0x1c8 > [ 0.000000] ---[ end Kernel panic - not syncing: Kernel stack overflow ]--- > > In other words, the maximum value of the physical address needs to meet > Documentation/riscv/vm-layout.rst to ensure that there is no overflow. > For sv48/57, the actual virtual address space is huge, so this problem > is generally not triggered, but it is also checked in the code. > > We give a warning for the overflowed physical address region and reverve it > so that the kernel can bringup successfully. > > Signed-off-by: Yunhui Cui <cuiyunhui@xxxxxxxxxxxxx> > --- > arch/riscv/include/asm/page.h | 9 +++++++ > arch/riscv/include/asm/pgtable.h | 1 + > arch/riscv/kernel/setup.c | 1 + > arch/riscv/mm/init.c | 44 ++++++++++++++++++++++++++++++++ > include/linux/memblock.h | 5 ++++ > mm/memblock.c | 5 ---- > 6 files changed, 60 insertions(+), 5 deletions(-) > > diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h > index 235fd45d998d..60c3db47c6d8 100644 > --- a/arch/riscv/include/asm/page.h > +++ b/arch/riscv/include/asm/page.h > @@ -38,6 +38,15 @@ > */ > #define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL) > #define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL) > + > +/* > + * See vm-layout.rst, the size of L3 direct mapping of all physical > + * memory 124GB, L4 is 64TB, L5 is 32PB. > + */ > +#define MAX_PFN_MEM_ADDR_L5 (0x80000000000000ULL) > +#define MAX_PFN_MEM_ADDR_L4 (0x400000000000ULL) > +#define MAX_PFN_MEM_ADDR_L3 (0x1F00000000ULL) > + > #else > #define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) > #endif /* CONFIG_64BIT */ > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h > index 089f3c9f56a3..b4ff4791e1dc 100644 > --- a/arch/riscv/include/asm/pgtable.h > +++ b/arch/riscv/include/asm/pgtable.h > @@ -947,6 +947,7 @@ extern uintptr_t _dtb_early_pa; > #endif /* CONFIG_XIP_KERNEL */ > extern u64 satp_mode; > > +void paging_check(void); > void paging_init(void); > void misc_mem_init(void); > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c > index 4f73c0ae44b2..24fedaa7ae93 100644 > --- a/arch/riscv/kernel/setup.c > +++ b/arch/riscv/kernel/setup.c > @@ -259,6 +259,7 @@ void __init setup_arch(char **cmdline_p) > parse_early_param(); > > efi_init(); > + paging_check(); > paging_init(); > > /* Parse the ACPI tables for possible boot-time configuration */ > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c > index bfa2dea95354..1c475c1d4c1b 100644 > --- a/arch/riscv/mm/init.c > +++ b/arch/riscv/mm/init.c > @@ -1385,6 +1385,50 @@ static void __init arch_reserve_crashkernel(void) > low_size, high); > } > > +static inline bool phymem_addr_overflow(phys_addr_t start, phys_addr_t end) > +{ > + if (pgtable_l5_enabled) { > + if (start > MAX_PFN_MEM_ADDR_L5 || end > MAX_PFN_MEM_ADDR_L5) > + goto out; > + } > + if (pgtable_l4_enabled) { > + if (start > MAX_PFN_MEM_ADDR_L4 || end > MAX_PFN_MEM_ADDR_L4) > + goto out; > + } > + if (start > MAX_PFN_MEM_ADDR_L3 || end > MAX_PFN_MEM_ADDR_L3) > + goto out; > + > + return false; > + > +out: > + WARN(true, "Physical memory address overflowed!"); > + return true; > +} > + > +static void __init phymem_check(struct memblock_type *type) > +{ > + phys_addr_t base, end, size; > + int idx; > + struct memblock_region *rgn; > + > + for_each_memblock_type(idx, type, rgn) { > + base = rgn->base; > + size = rgn->size; > + end = base + size - 1; > + > + if (phymem_addr_overflow(base, end)) { > + pr_warn("Region: [0x%llx-0x%llx] reserved.", base, end); > + memblock_reserve(base, size); > + } > + } > +} > + > +void __init paging_check(void) > +{ > + phymem_check(&memblock.reserved); > + phymem_check(&memblock.memory); > +} > + > void __init paging_init(void) > { > setup_bootmem(); > diff --git a/include/linux/memblock.h b/include/linux/memblock.h > index fc4d75c6cec3..36a38c326b7a 100644 > --- a/include/linux/memblock.h > +++ b/include/linux/memblock.h > @@ -251,6 +251,11 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, > __for_each_mem_range(i, &memblock.reserved, NULL, NUMA_NO_NODE, \ > MEMBLOCK_NONE, p_start, p_end, NULL) > > +#define for_each_memblock_type(i, memblock_type, rgn) \ > + for (i = 0, rgn = &memblock_type->regions[0]; \ > + i < memblock_type->cnt; \ > + i++, rgn = &memblock_type->regions[i]) > + > static inline bool memblock_is_hotpluggable(struct memblock_region *m) > { > return m->flags & MEMBLOCK_HOTPLUG; > diff --git a/mm/memblock.c b/mm/memblock.c > index 3b9dc2d89b8a..f992050093f1 100644 > --- a/mm/memblock.c > +++ b/mm/memblock.c > @@ -141,11 +141,6 @@ struct memblock_type physmem = { > */ > static __refdata struct memblock_type *memblock_memory = &memblock.memory; > > -#define for_each_memblock_type(i, memblock_type, rgn) \ > - for (i = 0, rgn = &memblock_type->regions[0]; \ > - i < memblock_type->cnt; \ > - i++, rgn = &memblock_type->regions[i]) > - > #define memblock_dbg(fmt, ...) \ > do { \ > if (memblock_debug) \ > -- > 2.39.2 > Thanks, Yunhui