Hi Thomas, Thanks for reviewing! On 04/26/17 at 07:49am, Thomas Garnier wrote: > On Wed, Apr 26, 2017 at 3:43 AM, Baoquan He <bhe@xxxxxxxxxx> wrote: > > > arch/x86/platform/efi/efi_64.c | 35 +++++++++++++++++++++++++++-------- > > > 1 file changed, 27 insertions(+), 8 deletions(-) > > > > > > diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c > > > index 2ee7694..2e7baff 100644 > > > --- a/arch/x86/platform/efi/efi_64.c > > > +++ b/arch/x86/platform/efi/efi_64.c > > > @@ -71,11 +71,12 @@ static void __init early_code_mapping_set_exec(int executable) > > > > > > pgd_t * __init efi_call_phys_prolog(void) > > > { > > > - unsigned long vaddress; > > > + unsigned long vaddr, left_vaddr; > > > + unsigned int num_entries; > > > pgd_t *save_pgd; > > > - > > > - int pgd; > > > + pud_t *pud, *pud_k; > > > int n_pgds; > > > + int i; > > > > > > if (!efi_enabled(EFI_OLD_MEMMAP)) { > > > save_pgd = (pgd_t *)read_cr3(); > > > @@ -88,10 +89,22 @@ pgd_t * __init efi_call_phys_prolog(void) > > > n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); > > > save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL); > > > > > > - for (pgd = 0; pgd < n_pgds; pgd++) { > > > - save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE); > > > - vaddress = (unsigned long)__va(pgd * PGDIR_SIZE); > > > - set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); > > > + for (i = 0; i < n_pgds; i++) { > > > + save_pgd[i] = *pgd_offset_k(i * PGDIR_SIZE); > > > + > > > + vaddr = (unsigned long)__va(i * PGDIR_SIZE); > > > + pud = pud_alloc_one(NULL, 0); > > Please check if pud is NULL. I considered it a while. I didn't check because I thought it's still in kernel init stage, and at most 128 page frames are cost for 64TB, namely 512KB. If kernel can't give 512KB at this time, it will die soon. I would like to hear what people are suggesting. Since you have pointed it out, I will add checking here. However I think we can keep those allocated page and try our best to build as much ident mapping as possible. E.g if we have 10TB memory, but failed to allocate page for 11th pud table, we can break the for loop, leave those built ident mapping there since efi region could be located inside those 0~5TB region. Then inefi_call_phys_epilog() only free these allocated pud tables in efi_call_phys_prolog, check and avoid freeing those pud tables from direct mapping which still existed because of allocation failure in efi_call_phys_prolog. diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 2e7baff..67920d4 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -94,6 +94,11 @@ pgd_t * __init efi_call_phys_prolog(void) vaddr = (unsigned long)__va(i * PGDIR_SIZE); pud = pud_alloc_one(NULL, 0); + if (!pud) { + pr_err("Failed to allocate page for %d-th pud table " + "to build 1:1 mapping!\n", i); + break; + } num_entries = PTRS_PER_PUD - pud_index(vaddr); pud_k = pud_offset(pgd_offset_k(vaddr), vaddr); @@ -132,8 +137,10 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) { pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE); - pud = (pud_t *)pgd_page_vaddr(*pgd); - pud_free(NULL, pud); + if (*pgd != save_pgd[pgd_idx]) { + pud = (pud_t *)pgd_page_vaddr(*pgd); + pud_free(NULL, pud); + } set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); } > > > > + > > > + num_entries = PTRS_PER_PUD - pud_index(vaddr); > > > + pud_k = pud_offset(pgd_offset_k(vaddr), vaddr); > > > + memcpy(pud, pud_k, num_entries); > > > + if (pud_index(vaddr) > 0) { > > You are using pud_index(vaddr) 3 times, might be worth using a local variable. Sure, will do, thanks. > > > > + left_vaddr = vaddr + (num_entries * PUD_SIZE); > > > + pud_k = pud_offset(pgd_offset_k(left_vaddr), > > > + left_vaddr); > > > + memcpy(pud + num_entries, pud_k, pud_index(vaddr)); > > I think this section (or the overall for loop) would benefit with a > comment explaining explaining why you are shifting the new PUD like > this. Will write a paragraph. > > > > + } > > > + pgd_populate(NULL, pgd_offset_k(i * PGDIR_SIZE), pud); > > > } > > > out: > > > __flush_tlb_all(); > > > @@ -106,6 +119,8 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) > > > */ > > > int pgd_idx; > > > int nr_pgds; > > > + pud_t *pud; > > > + pgd_t *pgd; > > > > > > if (!efi_enabled(EFI_OLD_MEMMAP)) { > > > write_cr3((unsigned long)save_pgd); > > > @@ -115,8 +130,12 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) > > > > > > nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); > > > > > > - for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) > > > + for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) { > > > + pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE); > > > + pud = (pud_t *)pgd_page_vaddr(*pgd); > > > + pud_free(NULL, pud); > > > set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); > > > + } > > > > > > kfree(save_pgd); > > > > > > -- > > > 2.5.5 > > > > > > > > -- > Thomas -- To unsubscribe from this list: send the line "unsubscribe linux-efi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html