On Mon, Dec 09, 2013 at 05:42:22PM +0800, Dave Young wrote: > Add a new setup_data type SETUP_EFI for kexec use. > Passing the saved fw_vendor, runtime, config tables and efi runtime mappings. > > When entering virtual mode, directly mapping the efi runtime ragions which > we passed in previously. And skip the step to call SetVirtualAddressMap. > > Specially for HP z420 workstation we need save the smbios physical address. > The kernel boot sequence proceeds in the following order. Step 2 > requires efi.smbios to be the physical address. However, I found that on > HP z420 EFI system table has a virtual address of SMBIOS in step 1. Hence, > we need set it back to the physical address with the smbios in > efi_setup_data. (When it is still the physical address, it simply sets > the same value.) > > 1. efi_init() - Set efi.smbios from EFI system table > 2. dmi_scan_machine() - Temporary map efi.smbios to access SMBIOS table > 3. efi_enter_virtual_mode() - Map EFI ranges > > Tested on ovmf+qemu, lenovo thinkpad, a dell laptop and an > HP z420 workstation. > > v2: refresh based on previous patch changes, code cleanup. > v3: use ioremap instead of phys_to_virt for efi_setup > v5: improve some code structure per comments from Matt > Boris: improve code structure, spell fix, etc. > Improve changelog from Toshi. > change the variable efi_setup to the physical address of efi setup_data > instead of the ioremapped virt address > > Signed-off-by: Dave Young <dyoung at redhat.com> > --- > arch/x86/include/asm/efi.h | 11 ++ > arch/x86/include/uapi/asm/bootparam.h | 1 + > arch/x86/kernel/setup.c | 3 + > arch/x86/platform/efi/efi.c | 195 ++++++++++++++++++++++++++++++---- > 4 files changed, 187 insertions(+), 23 deletions(-) ... > @@ -115,6 +116,25 @@ static int __init setup_storage_paranoia(char *arg) > } > early_param("efi_no_storage_paranoia", setup_storage_paranoia); > > +void __init parse_efi_setup(u64 phys_addr) > +{ > + struct setup_data *sd; > + > + if (!efi_enabled(EFI_64BIT)) { > + pr_warn("SETUP_EFI not supported on 32-bit\n"); > + return; > + } Shouldn't this function be in two versions in efi_64.c and efi_32.c? This way you don't need this check with cryptic printk message. > + > + sd = early_memremap(phys_addr, sizeof(struct setup_data)); > + if (!sd) { > + pr_warn("efi: early_memremap setup_data failed\n"); > + return; > + } > + efi_setup = phys_addr + sizeof(struct setup_data); > + nr_efi_runtime_map = (sd->len - sizeof(struct efi_setup_data)) / > + sizeof(efi_memory_desc_t); > + early_memunmap(sd, sizeof(struct setup_data)); > +} > > static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) > { > @@ -494,18 +514,28 @@ static int __init efi_systab_init(void *phys) > { > if (efi_enabled(EFI_64BIT)) { > efi_system_table_64_t *systab64; > + struct efi_setup_data *data = NULL; > u64 tmp = 0; > > + if (efi_setup) { > + data = early_memremap(efi_setup, sizeof(*data)); > + if (!data) > + return -ENOMEM; > + } > systab64 = early_memremap((unsigned long)phys, > sizeof(*systab64)); > if (systab64 == NULL) { > pr_err("Couldn't map the system table!\n"); > + if (data) > + early_memunmap(data, sizeof(*data)); > return -ENOMEM; > } > > efi_systab.hdr = systab64->hdr; > - efi_systab.fw_vendor = systab64->fw_vendor; > - tmp |= systab64->fw_vendor; > + > + efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor : > + systab64->fw_vendor; > + tmp |= efi_systab.fw_vendor; > efi_systab.fw_revision = systab64->fw_revision; > efi_systab.con_in_handle = systab64->con_in_handle; > tmp |= systab64->con_in_handle; > @@ -519,15 +549,20 @@ static int __init efi_systab_init(void *phys) > tmp |= systab64->stderr_handle; > efi_systab.stderr = systab64->stderr; > tmp |= systab64->stderr; > - efi_systab.runtime = (void *)(unsigned long)systab64->runtime; > - tmp |= systab64->runtime; > + efi_systab.runtime = data ? > + (void *)(unsigned long)data->runtime : > + (void *)(unsigned long)systab64->runtime; > + tmp |= (unsigned long)efi_systab.runtime; > efi_systab.boottime = (void *)(unsigned long)systab64->boottime; > tmp |= systab64->boottime; > efi_systab.nr_tables = systab64->nr_tables; > - efi_systab.tables = systab64->tables; > - tmp |= systab64->tables; > + efi_systab.tables = data ? (unsigned long)data->tables : > + systab64->tables; > + tmp |= efi_systab.tables; > > early_memunmap(systab64, sizeof(*systab64)); > + if (data) > + early_memunmap(data, sizeof(*data)); > #ifdef CONFIG_X86_32 > if (tmp >> 32) { > pr_err("EFI data located above 4GB, disabling EFI.\n"); > @@ -631,6 +666,61 @@ static int __init efi_memmap_init(void) > return 0; > } > > +/* > + * For kexec kernel there's some special config table entries which could be > + * converted to virtual addresses after entering virtual mode. In kexec kernel > + * we need the physical addresses instead, thus passing them via setup_data > + * and update the entries to physical addresses in this function. Rewrite: "A number of config table entries get remapped to virtual addresses after entering EFI virtual mode. However, the kexec kernel requires their physical addresses therefore we pass them via setup_data and correct those entries to their respective physical addresses here." > + * > + * Currently only handles smbios which is necessary for HP z420. Didn't we say that this behavior is coming from a generic UEFI fw implementation and if so, no need to mention z420? > + */ > +static int __init efi_reuse_config(u64 tables, int nr_tables) > +{ > + int i, sz, ret = 0; > + void *p, *tablep; > + struct efi_setup_data *data; > + > + if (!efi_setup) > + return 0; > + > + if (!efi_enabled(EFI_64BIT)) > + return 0; > + > + data = early_memremap(efi_setup, sizeof(*data)); > + if (!data) { > + ret = -ENOMEM; > + goto out; > + } > + > + if (!data->smbios) > + goto out_memremap; > + > + sz = sizeof(efi_config_table_64_t); > + > + p = tablep = early_memremap(tables, nr_tables * sz); > + if (!p) { > + pr_err("Could not map Configuration table!\n"); > + ret = -ENOMEM; > + goto out_memremap; > + } > + > + for (i = 0; i < efi.systab->nr_tables; i++) { > + efi_guid_t guid; > + > + guid = ((efi_config_table_64_t *)p)->guid; > + > + if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) > + ((efi_config_table_64_t *)p)->table = data->smbios; > + p += sz; > + } > + early_memunmap(tablep, nr_tables * sz); > + > +out_memremap: > + early_memunmap(data, sizeof(*data)); > +out: > + return ret; > +} > + > void __init efi_init(void) > { > efi_char16_t *c16; > @@ -676,6 +766,8 @@ void __init efi_init(void) > efi.systab->hdr.revision >> 16, > efi.systab->hdr.revision & 0xffff, vendor); > > + efi_reuse_config(efi.systab->tables, efi.systab->nr_tables); > + > if (efi_config_init(arch_tables)) > return; > > @@ -886,6 +978,50 @@ out_krealloc: > } > > /* > + * Map efi regions which was passed via setup_data. The virt_addr is a fixed were > + * addr which was used in first kernel in case kexec boot. ^ of a > + */ > +static int __init map_regions_fixed(void) > +{ > + int i, s, ret = 0; > + u64 end, systab; > + unsigned long size; > + efi_memory_desc_t *md; > + struct efi_setup_data *data; > + > + s = sizeof(*data) + nr_efi_runtime_map * sizeof(data->map[0]); > + data = early_memremap(efi_setup, s); > + if (!data) { > + ret = -ENOMEM; > + goto out; > + } newline. > + for (i = 0, md = data->map; i < nr_efi_runtime_map; i++, md++) { > + efi_map_region_fixed(md); /* FIXME: add error handling */ > + size = md->num_pages << PAGE_SHIFT; > + end = md->phys_addr + size; > + > + systab = (u64) (unsigned long) efi_phys.systab; > + if (md->phys_addr <= systab && systab < end) { > + systab += md->virt_addr - md->phys_addr; > + efi.systab = (efi_system_table_t *)(unsigned long)systab; > + } > + ret = save_runtime_map(md, i); Wait a minute, this is executed in the second, kexec-ed kernel, right? Why do we need to save the map there too? > + if (ret) > + goto out_save_runtime; > + } > + > + early_memunmap(data, s); > + return 0; > + > +out_save_runtime: > + kfree(efi_runtime_map); > + nr_efi_runtime_map = 0; > + early_memunmap(data, s); > +out: > + return ret; > +} > + > +/* > * This function will switch the EFI runtime services to virtual mode. > * Essentially, we look through the EFI memmap and map every region that > * has the runtime attribute bit set in its memory descriptor into the > @@ -901,12 +1037,16 @@ out_krealloc: > * so that we're in a different address space when calling a runtime > * function. For function arguments passing we do copy the PGDs of the > * kernel page table into ->trampoline_pgd prior to each call. > + * > + * Specially for kexec boot, efi runtime maps in previous kernel should > + * be passed in via setup_data. In that case runtime ranges will be mapped > + * to the same virtual addresses exactly same as the ones in previous kernel. "... to the same ..exactly same as ... " sounds funny. What's wrong with "... to the same virtual addresses as the first kernel." or if you really insist on "exact": "... to the same exact virtual addresses as the first kernel." > */ > void __init efi_enter_virtual_mode(void) > { -- Regards/Gruss, Boris. Sent from a fat crate under my desk. Formatting is fine. --