Commit 8d57470d introduced a kernel panic while setting mem=2G at boot time, and commit c9b3234a6 turns the the kernel panic to hang. While, the reason is the same: the are accessing a BAD address; I mean the mapping is broken. Here is a mem mapping range dumped at boot time: [mem 0x00000000-0x000fffff] page 4k (0) [mem 0x7fe00000-0x7fffffff] page 1G (1) [mem 0x7c000000-0x7fdfffff] page 1G (2) [mem 0x00100000-0x001fffff] page 4k (3) [mem 0x00200000-0x7bffffff] page 2M (4) Where, we met no problems while setting memory map for region (0) to (3). But we have set PG_LEVEL_1G mapping for pud index 0x1 at (1). And pud index comes to 0x1 as well while setting 0x40000000-0x7bf00000 part of (4). What's more, it's PG_LEVEL_2M mapping, which results to a splitting of PG_LEVEL_1G mapping. This breaks former mapping for (1) and (2). In the same time, due to "end" setting to 0x7c000000, we missed the chance to fix it at phys_pmd_init() for code: if (address >= end) { .... continue; } Thus, using a extra flag to indicate we are splitting a large PUD(or PMD) and changing the above if statement to following will make this issue gone: if(address >= end && !spliting) { ... } Reported-by: LKP <lkp@xxxxxxxxxxxxxxx> CC: For 3.9+ <stable@xxxxxxxxxxxxxxx> Cc: H. Peter Anvin <hpa@xxxxxxxxx> Cc: Yinghai Lu <yinghai@xxxxxxxxxx> Bisected-by: "Xie, ChanglongX" <changlongx.xie@xxxxxxxxx> Signed-off-by: Yuanhan Liu <yuanhan.liu@xxxxxxxxxxxxxxx> --- I reported this panic regression long time ago, and I didn't notic the above panic->hang change before, which might confuse Yinghai for understanding what happened from 2 logs I sent before(one is from 8d57470d, another is from the HEAD commit at that time, which turn to a hang as stated). More, it seems that Yinghai can't produce it. And I was busying at something else. And I finally got a day yesterday(and a good mood ;). Last, Thanks Changlong's effort for bisecting the 2 above commit. --- arch/x86/mm/init_64.c | 51 +++++++++++++++++++++++++++++++++++++++++------- 1 files changed, 43 insertions(+), 8 deletions(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bb00c46..e4c7038 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -401,7 +401,7 @@ void __init cleanup_highmap(void) static unsigned long __meminit phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, - pgprot_t prot) + pgprot_t prot, bool split_pmd) { unsigned long pages = 0, next; unsigned long last_map_addr = end; @@ -411,7 +411,7 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) { next = (addr & PAGE_MASK) + PAGE_SIZE; - if (addr >= end) { + if (addr >= end && !split_pmd) { if (!after_bootmem && !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) && !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN)) @@ -446,7 +446,7 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, static unsigned long __meminit phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, - unsigned long page_size_mask, pgprot_t prot) + unsigned long page_size_mask, pgprot_t prot, bool split_pud) { unsigned long pages = 0, next; unsigned long last_map_addr = end; @@ -457,9 +457,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, pmd_t *pmd = pmd_page + pmd_index(address); pte_t *pte; pgprot_t new_prot = prot; + bool split_pmd = false; next = (address & PMD_MASK) + PMD_SIZE; - if (address >= end) { + if (address >= end && !split_pud) { if (!after_bootmem && !e820_any_mapped(address & PMD_MASK, next, E820_RAM) && !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN)) @@ -472,7 +473,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, spin_lock(&init_mm.page_table_lock); pte = (pte_t *)pmd_page_vaddr(*pmd); last_map_addr = phys_pte_init(pte, address, - end, prot); + end, prot, split_pmd); spin_unlock(&init_mm.page_table_lock); continue; } @@ -495,6 +496,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, continue; } new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); + split_pmd = true; } if (page_size_mask & (1<<PG_LEVEL_2M)) { @@ -509,7 +511,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, } pte = alloc_low_page(); - last_map_addr = phys_pte_init(pte, address, end, new_prot); + last_map_addr = phys_pte_init(pte, address, end, + new_prot, split_pmd); spin_lock(&init_mm.page_table_lock); pmd_populate_kernel(&init_mm, pmd, pte); @@ -531,6 +534,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; pgprot_t prot = PAGE_KERNEL; + bool split_pud = false; next = (addr & PUD_MASK) + PUD_SIZE; if (addr >= end) { @@ -545,7 +549,8 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, if (!pud_large(*pud)) { pmd = pmd_offset(pud, 0); last_map_addr = phys_pmd_init(pmd, addr, end, - page_size_mask, prot); + page_size_mask, prot, + split_pud); __flush_tlb_all(); continue; } @@ -568,6 +573,36 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, continue; } prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); + /* + * We set page table in top-down now, which means we + * might have set a PG_LEVEL_1G mapping for a higher + * address. + * + * And in the meantime, here we meet the same PUD in + * a lower mem region and we are about to split it. + * Setting split_pud to make sure we will re-map + * former mapping as well. Or, we will just ignore + * it due to + * if (address >= end) { + * ... + * continue; + * } + * at phys_pmd_init(). + * + * Example: here is one case I met: + * [mem 0x00000000-0x000fffff] page 4k (0) + * [mem 0x7fe00000-0x7fffffff] page 1G (1) + * [mem 0x7c000000-0x7fdfffff] page 1G (2) + * [mem 0x00100000-0x001fffff] page 4k (3) + * [mem 0x00200000-0x7bffffff] page 2M (4) + * + * Where mem 0x400000000 to mem 0x7fffffff will use same + * PUD, and we have set a PG_LEVEL_1G mapping at (1). + * While handling 0x40000000 - 0x7bf00000 part of (4), + * we will split PUD and break former mapping for (1) + * and (2) as stated above. + */ + split_pud = true; } if (page_size_mask & (1<<PG_LEVEL_1G)) { @@ -583,7 +618,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, pmd = alloc_low_page(); last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, - prot); + prot, split_pud); spin_lock(&init_mm.page_table_lock); pud_populate(&init_mm, pud, pmd); -- 1.7.7.6 -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html