On Thu, 19 Sep 2019 at 18:06, Kairui Song <kasong@xxxxxxxxxx> wrote: > > Currently, kernel fails to boot on some HyperV VMs when using EFI. > And it's a potential issue on all platforms. > > It's caused a broken kernel relocation on EFI systems, when below three > conditions are met: > > 1. Kernel image is not loaded to the default address (LOAD_PHYSICAL_ADDR) > by the loader. > 2. There isn't enough room to contain the kernel, starting from the > default load address (eg. something else occupied part the region). > 3. In the memmap provided by EFI firmware, there is a memory region > starts below LOAD_PHYSICAL_ADDR, and suitable for containing the > kernel. > > Efi stub will perform a kernel relocation when condition 1 is met. But > due to condition 2, efi stub can't relocate kernel to the preferred > address, so it fallback to query and alloc from EFI firmware for lowest > usable memory region. > > It's incorrect to use the lowest memory address. In later stage, kernel > will assume LOAD_PHYSICAL_ADDR as the minimal acceptable relocate address, > but efi stub will end up relocating kernel below it. > > Then before the kernel decompressing. Kernel will do another relocation > to address not lower than LOAD_PHYSICAL_ADDR, this time the relocate will > over write the blockage at the default load address, which efi stub tried > to avoid, and lead to unexpected behavior. Beside, the memory region it > writes to is not allocated from EFI firmware, which is also wrong. > > To fix it, just don't let efi stub relocate the kernel to any address > lower than lowest acceptable address. > > Signed-off-by: Kairui Song <kasong@xxxxxxxxxx> > Hello Kairui, This patch looks correct to me, but it needs an ack from the x86 maintainers, since the rules around LOAD_PHYSICAL_ADDR are specific to the x86 architecture. > --- > > Update from V1: > - Redo the commit message. > > arch/x86/boot/compressed/eboot.c | 8 +++++--- > drivers/firmware/efi/libstub/arm32-stub.c | 2 +- > drivers/firmware/efi/libstub/arm64-stub.c | 2 +- > drivers/firmware/efi/libstub/efi-stub-helper.c | 12 ++++++++---- > include/linux/efi.h | 5 +++-- > 5 files changed, 18 insertions(+), 11 deletions(-) > > diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c > index 936bdb924ec2..8207e8aa297e 100644 > --- a/arch/x86/boot/compressed/eboot.c > +++ b/arch/x86/boot/compressed/eboot.c > @@ -13,6 +13,7 @@ > #include <asm/e820/types.h> > #include <asm/setup.h> > #include <asm/desc.h> > +#include <asm/boot.h> > > #include "../string.h" > #include "eboot.h" > @@ -432,7 +433,7 @@ struct boot_params *make_boot_params(struct efi_config *c) > } > > status = efi_low_alloc(sys_table, 0x4000, 1, > - (unsigned long *)&boot_params); > + (unsigned long *)&boot_params, 0); > if (status != EFI_SUCCESS) { > efi_printk(sys_table, "Failed to allocate lowmem for boot params\n"); > return NULL; > @@ -817,7 +818,7 @@ efi_main(struct efi_config *c, struct boot_params *boot_params) > > gdt->size = 0x800; > status = efi_low_alloc(sys_table, gdt->size, 8, > - (unsigned long *)&gdt->address); > + (unsigned long *)&gdt->address, 0); > if (status != EFI_SUCCESS) { > efi_printk(sys_table, "Failed to allocate memory for 'gdt'\n"); > goto fail; > @@ -842,7 +843,8 @@ efi_main(struct efi_config *c, struct boot_params *boot_params) > status = efi_relocate_kernel(sys_table, &bzimage_addr, > hdr->init_size, hdr->init_size, > hdr->pref_address, > - hdr->kernel_alignment); > + hdr->kernel_alignment, > + LOAD_PHYSICAL_ADDR); > if (status != EFI_SUCCESS) { > efi_printk(sys_table, "efi_relocate_kernel() failed!\n"); > goto fail; > diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c > index e8f7aefb6813..bf6f954d6afe 100644 > --- a/drivers/firmware/efi/libstub/arm32-stub.c > +++ b/drivers/firmware/efi/libstub/arm32-stub.c > @@ -220,7 +220,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table, > *image_size = image->image_size; > status = efi_relocate_kernel(sys_table, image_addr, *image_size, > *image_size, > - dram_base + MAX_UNCOMP_KERNEL_SIZE, 0); > + dram_base + MAX_UNCOMP_KERNEL_SIZE, 0, 0); > if (status != EFI_SUCCESS) { > pr_efi_err(sys_table, "Failed to relocate kernel.\n"); > efi_free(sys_table, *reserve_size, *reserve_addr); > diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c > index 1550d244e996..3d2e517e10f4 100644 > --- a/drivers/firmware/efi/libstub/arm64-stub.c > +++ b/drivers/firmware/efi/libstub/arm64-stub.c > @@ -140,7 +140,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg, > if (status != EFI_SUCCESS) { > *reserve_size = kernel_memsize + TEXT_OFFSET; > status = efi_low_alloc(sys_table_arg, *reserve_size, > - MIN_KIMG_ALIGN, reserve_addr); > + MIN_KIMG_ALIGN, reserve_addr, 0); > > if (status != EFI_SUCCESS) { > pr_efi_err(sys_table_arg, "Failed to relocate kernel\n"); > diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c > index 3caae7f2cf56..00b00a2562aa 100644 > --- a/drivers/firmware/efi/libstub/efi-stub-helper.c > +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c > @@ -260,11 +260,11 @@ efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg, > } > > /* > - * Allocate at the lowest possible address. > + * Allocate at the lowest possible address that is not below 'min'. > */ > efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, > unsigned long size, unsigned long align, > - unsigned long *addr) > + unsigned long *addr, unsigned long min) > { > unsigned long map_size, desc_size, buff_size; > efi_memory_desc_t *map; > @@ -311,6 +311,9 @@ efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, > start = desc->phys_addr; > end = start + desc->num_pages * EFI_PAGE_SIZE; > > + if (start < min) > + start = min; > + > /* > * Don't allocate at 0x0. It will confuse code that > * checks pointers against NULL. Skip the first 8 > @@ -698,7 +701,8 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, > unsigned long image_size, > unsigned long alloc_size, > unsigned long preferred_addr, > - unsigned long alignment) > + unsigned long alignment, > + unsigned long min_addr) > { > unsigned long cur_image_addr; > unsigned long new_addr = 0; > @@ -732,7 +736,7 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, > */ > if (status != EFI_SUCCESS) { > status = efi_low_alloc(sys_table_arg, alloc_size, alignment, > - &new_addr); > + &new_addr, min_addr); > } > if (status != EFI_SUCCESS) { > pr_efi_err(sys_table_arg, "Failed to allocate usable memory for kernel.\n"); > diff --git a/include/linux/efi.h b/include/linux/efi.h > index f87fabea4a85..cc947c0f3e06 100644 > --- a/include/linux/efi.h > +++ b/include/linux/efi.h > @@ -1587,7 +1587,7 @@ efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg, > > efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, > unsigned long size, unsigned long align, > - unsigned long *addr); > + unsigned long *addr, unsigned long min); > > efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg, > unsigned long size, unsigned long align, > @@ -1598,7 +1598,8 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, > unsigned long image_size, > unsigned long alloc_size, > unsigned long preferred_addr, > - unsigned long alignment); > + unsigned long alignment, > + unsigned long min_addr); > > efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, > efi_loaded_image_t *image, > -- > 2.21.0 >