On Thu, Aug 04, 2022 at 02:44:11AM +0200, Jason A. Donenfeld wrote: > The boot parameter header refers to setup_data at an absolute address, > and each setup_data refers to the next setup_data at an absolute address > too. Currently QEMU simply puts the setup_datas right after the kernel > image, and since the kernel_image is loaded at prot_addr -- a fixed > address knowable to QEMU apriori -- the setup_data absolute address > winds up being just `prot_addr + a_fixed_offset_into_kernel_image`. > > This mostly works fine, so long as the kernel image really is loaded at > prot_addr. However, OVMF doesn't load the kernel at prot_addr, and > generally EFI doesn't give a good way of predicting where it's going to > load the kernel. So when it loads it at some address != prot_addr, the > absolute addresses in setup_data now point somewhere bogus, causing > crashes when EFI stub tries to follow the next link. > > Fix this by placing setup_data at some fixed place in memory, relative > to real_addr, not as part of the kernel image, and then pointing the > setup_data absolute address to that fixed place in memory. This way, > even if OVMF or other chains relocate the kernel image, the boot > parameter still points to the correct absolute address. > > Fixes: 3cbeb52467 ("hw/i386: add device tree support") > Reported-by: Xiaoyao Li <xiaoyao.li@xxxxxxxxx> > Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> > Cc: Richard Henderson <richard.henderson@xxxxxxxxxx> > Cc: Peter Maydell <peter.maydell@xxxxxxxxxx> > Cc: Michael S. Tsirkin <mst@xxxxxxxxxx> > Cc: Daniel P. Berrangé <berrange@xxxxxxxxxx> > Cc: Gerd Hoffmann <kraxel@xxxxxxxxxx> > Cc: Ard Biesheuvel <ardb@xxxxxxxxxx> > Cc: linux-efi@xxxxxxxxxxxxxxx > Signed-off-by: Jason A. Donenfeld <Jason@xxxxxxxxx> Didn't read the patch yet. Adding Laszlo. > --- > hw/i386/x86.c | 38 ++++++++++++++++++++------------------ > 1 file changed, 20 insertions(+), 18 deletions(-) > > diff --git a/hw/i386/x86.c b/hw/i386/x86.c > index 050eedc0c8..8b853abf38 100644 > --- a/hw/i386/x86.c > +++ b/hw/i386/x86.c > @@ -760,36 +760,36 @@ static bool load_elfboot(const char *kernel_filename, > fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); > fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); > fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); > > return true; > } > > void x86_load_linux(X86MachineState *x86ms, > FWCfgState *fw_cfg, > int acpi_data_size, > bool pvh_enabled, > bool legacy_no_rng_seed) > { > bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled; > uint16_t protocol; > int setup_size, kernel_size, cmdline_size; > - int dtb_size, setup_data_offset; > + int dtb_size, setup_data_item_len, setup_data_total_len = 0; > uint32_t initrd_max; > - uint8_t header[8192], *setup, *kernel; > - hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0, first_setup_data = 0; > + uint8_t header[8192], *setup, *kernel, *setup_datas = NULL; > + hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0, first_setup_data = 0, setup_data_base; > FILE *f; > char *vmode; > MachineState *machine = MACHINE(x86ms); > struct setup_data *setup_data; > const char *kernel_filename = machine->kernel_filename; > const char *initrd_filename = machine->initrd_filename; > const char *dtb_filename = machine->dtb; > const char *kernel_cmdline = machine->kernel_cmdline; > SevKernelLoaderContext sev_load_ctx = {}; > enum { RNG_SEED_LENGTH = 32 }; > > /* Align to 16 bytes as a paranoia measure */ > cmdline_size = (strlen(kernel_cmdline) + 16) & ~15; > > /* load the kernel header */ > f = fopen(kernel_filename, "rb"); > @@ -886,32 +886,33 @@ void x86_load_linux(X86MachineState *x86ms, > if (protocol < 0x200 || !(header[0x211] & 0x01)) { > /* Low kernel */ > real_addr = 0x90000; > cmdline_addr = 0x9a000 - cmdline_size; > prot_addr = 0x10000; > } else if (protocol < 0x202) { > /* High but ancient kernel */ > real_addr = 0x90000; > cmdline_addr = 0x9a000 - cmdline_size; > prot_addr = 0x100000; > } else { > /* High and recent kernel */ > real_addr = 0x10000; > cmdline_addr = 0x20000; > prot_addr = 0x100000; > } > + setup_data_base = real_addr + 0x8000; > > /* highest address for loading the initrd */ > if (protocol >= 0x20c && > lduw_p(header + 0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { > /* > * Linux has supported initrd up to 4 GB for a very long time (2007, > * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), > * though it only sets initrd_max to 2 GB to "work around bootloader > * bugs". Luckily, QEMU firmware(which does something like bootloader) > * has supported this. > * > * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can > * be loaded into any address. > * > * In addition, initrd_max is uint32_t simply because QEMU doesn't > * support the 64-bit boot protocol (specifically the ext_ramdisk_image > @@ -1049,60 +1050,61 @@ void x86_load_linux(X86MachineState *x86ms, > fclose(f); > > /* append dtb to kernel */ > if (dtb_filename) { > if (protocol < 0x209) { > fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); > exit(1); > } > > dtb_size = get_image_size(dtb_filename); > if (dtb_size <= 0) { > fprintf(stderr, "qemu: error reading dtb %s: %s\n", > dtb_filename, strerror(errno)); > exit(1); > } > > - setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); > - kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; > - kernel = g_realloc(kernel, kernel_size); > - > - > - setup_data = (struct setup_data *)(kernel + setup_data_offset); > + setup_data_item_len = sizeof(struct setup_data) + dtb_size; > + setup_datas = g_realloc(setup_datas, setup_data_total_len + setup_data_item_len); > + setup_data = (struct setup_data *)(setup_datas + setup_data_total_len); > setup_data->next = cpu_to_le64(first_setup_data); > - first_setup_data = prot_addr + setup_data_offset; > + first_setup_data = setup_data_base + setup_data_total_len; > + setup_data_total_len += setup_data_item_len; > setup_data->type = cpu_to_le32(SETUP_DTB); > setup_data->len = cpu_to_le32(dtb_size); > - > load_image_size(dtb_filename, setup_data->data, dtb_size); > } > > if (!legacy_no_rng_seed) { > - setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); > - kernel_size = setup_data_offset + sizeof(struct setup_data) + RNG_SEED_LENGTH; > - kernel = g_realloc(kernel, kernel_size); > - setup_data = (struct setup_data *)(kernel + setup_data_offset); > + setup_data_item_len = sizeof(struct setup_data) + RNG_SEED_LENGTH; > + setup_datas = g_realloc(setup_datas, setup_data_total_len + setup_data_item_len); > + setup_data = (struct setup_data *)(setup_datas + setup_data_total_len); > setup_data->next = cpu_to_le64(first_setup_data); > - first_setup_data = prot_addr + setup_data_offset; > + first_setup_data = setup_data_base + setup_data_total_len; > + setup_data_total_len += setup_data_item_len; > setup_data->type = cpu_to_le32(SETUP_RNG_SEED); > setup_data->len = cpu_to_le32(RNG_SEED_LENGTH); > qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH); > } > > - /* Offset 0x250 is a pointer to the first setup_data link. */ > - stq_p(header + 0x250, first_setup_data); > + if (first_setup_data) { > + /* Offset 0x250 is a pointer to the first setup_data link. */ > + stq_p(header + 0x250, first_setup_data); > + rom_add_blob("setup_data", setup_datas, setup_data_total_len, setup_data_total_len, > + setup_data_base, NULL, NULL, NULL, NULL, false); > + } > > /* > * If we're starting an encrypted VM, it will be OVMF based, which uses the > * efi stub for booting and doesn't require any values to be placed in the > * kernel header. We therefore don't update the header so the hash of the > * kernel on the other side of the fw_cfg interface matches the hash of the > * file the user passed in. > */ > if (!sev_enabled()) { > memcpy(setup, header, MIN(sizeof(header), setup_size)); > } > > fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); > fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); > fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); > sev_load_ctx.kernel_data = (char *)kernel; > -- > 2.35.1