On Thu, 17 Oct 2019 at 11:30, Kairui Song <kasong@xxxxxxxxxx> wrote: > > Currently, kernel fails to boot on some HyperV VMs when using EFI. > And it's a potential issue on all platforms. > > It's caused by broken kernel relocation on EFI systems, when below three > conditions are met: > > 1. Kernel image is not loaded to the default address (LOAD_PHYSICAL_ADDR) > by the loader. > 2. There isn't enough room to contain the kernel, starting from the > default load address (eg. something else occupied part the region). > 3. In the memmap provided by EFI firmware, there is a memory region > starts below LOAD_PHYSICAL_ADDR, and suitable for containing the > kernel. > > EFI stub will perform a kernel relocation when condition 1 is met. But > due to condition 2, EFI stub can't relocate kernel to the preferred > address, so it fallback to ask EFI firmware to alloc lowest usable memory > region, got the low region mentioned in condition 3, and relocated > kernel there. > > It's incorrect to relocate the kernel below LOAD_PHYSICAL_ADDR. This > is the lowest acceptable kernel relocation address. > > The first thing goes wrong is in arch/x86/boot/compressed/head_64.S. > Kernel decompression will force use LOAD_PHYSICAL_ADDR as the output > address if kernel is located below it. Then the relocation before > decompression, which move kernel to the end of the decompression buffer, > will overwrite other memory region, as there is no enough memory there. > > To fix it, just don't let EFI stub relocate the kernel to any address > lower than lowest acceptable address. > > Signed-off-by: Kairui Song <kasong@xxxxxxxxxx> > Acked-by: Jarkko Sakkinen <jarkko.sakkinen@xxxxxxxxxxxxxxx> > Ingo, Boris, could you please comment on this? Apologies for not responding with review comments until now, but I was waiting for someone from team-x86 to acknowledge the issue and confirm a fix is needed. Some comments below. > --- > Update from V3: > - Update commit message. > > Update from V2: > - Update part of the commit message. > > Update from V1: > - Redo the commit message. > > arch/x86/boot/compressed/eboot.c | 8 +++++--- > drivers/firmware/efi/libstub/arm32-stub.c | 2 +- > drivers/firmware/efi/libstub/arm64-stub.c | 2 +- > drivers/firmware/efi/libstub/efi-stub-helper.c | 12 ++++++++---- > include/linux/efi.h | 5 +++-- > 5 files changed, 18 insertions(+), 11 deletions(-) > > diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c > index d6662fdef300..e89e84b66527 100644 > --- a/arch/x86/boot/compressed/eboot.c > +++ b/arch/x86/boot/compressed/eboot.c > @@ -13,6 +13,7 @@ > #include <asm/e820/types.h> > #include <asm/setup.h> > #include <asm/desc.h> > +#include <asm/boot.h> > > #include "../string.h" > #include "eboot.h" > @@ -413,7 +414,7 @@ struct boot_params *make_boot_params(struct efi_config *c) > } > > status = efi_low_alloc(sys_table, 0x4000, 1, > - (unsigned long *)&boot_params); > + (unsigned long *)&boot_params, 0); Instead of changing the calls to efi_low_alloc() everywhere, could you please only update the implementation to take a 'min' argument, and rename it to something like ' ()', Then, the original extern declaration of efi_low_alloc() can be converted into a static inline that calls efi_low_alloc_above. That also allows us to pull the 'alloc at 0x0' logic out of the loop, e.g., efi_status_t efi_low_alloc_above(efi_system_table_t *sys_table_arg, unsigned long size, unsigned long align, unsigned long *addr, unsigned long min); static inline efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, unsigned long size, unsigned long align, unsigned long *addr) { /* * Don't allocate at 0x0. It will confuse code that * checks pointers against NULL. Skip the first 8 * bytes so we start at a nice even number. */ return efi_low_alloc_above(sys_table_arg, size, align, addr, 8); } (and drop the same logic from the function implementation) > if (status != EFI_SUCCESS) { > efi_printk(sys_table, "Failed to allocate lowmem for boot params\n"); > return NULL; > @@ -798,7 +799,7 @@ efi_main(struct efi_config *c, struct boot_params *boot_params) > > gdt->size = 0x800; > status = efi_low_alloc(sys_table, gdt->size, 8, > - (unsigned long *)&gdt->address); > + (unsigned long *)&gdt->address, 0); > if (status != EFI_SUCCESS) { > efi_printk(sys_table, "Failed to allocate memory for 'gdt'\n"); > goto fail; > @@ -813,7 +814,8 @@ efi_main(struct efi_config *c, struct boot_params *boot_params) > status = efi_relocate_kernel(sys_table, &bzimage_addr, > hdr->init_size, hdr->init_size, > hdr->pref_address, > - hdr->kernel_alignment); > + hdr->kernel_alignment, > + LOAD_PHYSICAL_ADDR); > if (status != EFI_SUCCESS) { > efi_printk(sys_table, "efi_relocate_kernel() failed!\n"); > goto fail; > diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c > index e8f7aefb6813..bf6f954d6afe 100644 > --- a/drivers/firmware/efi/libstub/arm32-stub.c > +++ b/drivers/firmware/efi/libstub/arm32-stub.c > @@ -220,7 +220,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table, > *image_size = image->image_size; > status = efi_relocate_kernel(sys_table, image_addr, *image_size, > *image_size, > - dram_base + MAX_UNCOMP_KERNEL_SIZE, 0); > + dram_base + MAX_UNCOMP_KERNEL_SIZE, 0, 0); > if (status != EFI_SUCCESS) { > pr_efi_err(sys_table, "Failed to relocate kernel.\n"); > efi_free(sys_table, *reserve_size, *reserve_addr); > diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c > index 1550d244e996..3d2e517e10f4 100644 > --- a/drivers/firmware/efi/libstub/arm64-stub.c > +++ b/drivers/firmware/efi/libstub/arm64-stub.c > @@ -140,7 +140,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg, > if (status != EFI_SUCCESS) { > *reserve_size = kernel_memsize + TEXT_OFFSET; > status = efi_low_alloc(sys_table_arg, *reserve_size, > - MIN_KIMG_ALIGN, reserve_addr); > + MIN_KIMG_ALIGN, reserve_addr, 0); > > if (status != EFI_SUCCESS) { > pr_efi_err(sys_table_arg, "Failed to relocate kernel\n"); > diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c > index 3caae7f2cf56..00b00a2562aa 100644 > --- a/drivers/firmware/efi/libstub/efi-stub-helper.c > +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c > @@ -260,11 +260,11 @@ efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg, > } > > /* > - * Allocate at the lowest possible address. > + * Allocate at the lowest possible address that is not below 'min'. > */ > efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, > unsigned long size, unsigned long align, > - unsigned long *addr) > + unsigned long *addr, unsigned long min) > { > unsigned long map_size, desc_size, buff_size; > efi_memory_desc_t *map; > @@ -311,6 +311,9 @@ efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, > start = desc->phys_addr; > end = start + desc->num_pages * EFI_PAGE_SIZE; > > + if (start < min) > + start = min; > + > /* > * Don't allocate at 0x0. It will confuse code that > * checks pointers against NULL. Skip the first 8 > @@ -698,7 +701,8 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, > unsigned long image_size, > unsigned long alloc_size, > unsigned long preferred_addr, > - unsigned long alignment) > + unsigned long alignment, > + unsigned long min_addr) > { > unsigned long cur_image_addr; > unsigned long new_addr = 0; > @@ -732,7 +736,7 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, > */ > if (status != EFI_SUCCESS) { > status = efi_low_alloc(sys_table_arg, alloc_size, alignment, > - &new_addr); > + &new_addr, min_addr); > } > if (status != EFI_SUCCESS) { > pr_efi_err(sys_table_arg, "Failed to allocate usable memory for kernel.\n"); > diff --git a/include/linux/efi.h b/include/linux/efi.h > index bd3837022307..a5144cc44e54 100644 > --- a/include/linux/efi.h > +++ b/include/linux/efi.h > @@ -1581,7 +1581,7 @@ efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg, > > efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, > unsigned long size, unsigned long align, > - unsigned long *addr); > + unsigned long *addr, unsigned long min); > > efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg, > unsigned long size, unsigned long align, > @@ -1592,7 +1592,8 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, > unsigned long image_size, > unsigned long alloc_size, > unsigned long preferred_addr, > - unsigned long alignment); > + unsigned long alignment, > + unsigned long min_addr); > > efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, > efi_loaded_image_t *image, > -- > 2.21.0 >