command line size is restricted by kernel, sometimes memmap=exactmap has too many memory ranges to pass to cmdline. A better approach, to pass the memory ranges for crash kernel to boot into, is filling the memory ranges into E820. boot_params only got 128 slots for E820 map to fit in, when the number of memory map exceeds 128, use setup_data to pass the rest as extended E820 memory map. kexec boot could also benefit from setup_data in case E820 memory map exceeds 128. Now this new approach becomes default instead of memmap=exactmap. saved_max_pfn users can specify --pass-memmap-cmdline to use the exactmap approach. Signed-off-by: WANG Chao <chaowang at redhat.com> Tested-by: Linn Crosetto <linn at hp.com> Reviewed-by: Linn Crosetto <linn at hp.com> --- kexec/arch/i386/crashdump-x86.c | 25 +++--- kexec/arch/i386/crashdump-x86.h | 1 + kexec/arch/i386/x86-linux-setup.c | 171 +++++++++++++++++++++++++------------- 3 files changed, 130 insertions(+), 67 deletions(-) diff --git a/kexec/arch/i386/crashdump-x86.c b/kexec/arch/i386/crashdump-x86.c index c55a6b1..cb19e7d 100644 --- a/kexec/arch/i386/crashdump-x86.c +++ b/kexec/arch/i386/crashdump-x86.c @@ -182,6 +182,8 @@ static int exclude_region(int *nr_ranges, uint64_t start, uint64_t end); struct memory_range crash_memory_range[CRASH_MAX_MEMORY_RANGES]; int crash_memory_ranges; +int pass_memmap_cmdline; + /* Memory region reserved for storing panic kernel and other data. */ #define CRASH_RESERVED_MEM_NR 8 static struct memory_range crash_reserved_mem[CRASH_RESERVED_MEM_NR]; @@ -947,20 +949,23 @@ int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, dbgprintf("Created elf header segment at 0x%lx\n", elfcorehdr); if (delete_memmap(crash_memory_range, &crash_memory_ranges, elfcorehdr, memsz) < 0) return -1; - cmdline_add_memmap(mod_cmdline, crash_memory_range); if (!bzImage_support_efi_boot) cmdline_add_efi(mod_cmdline); cmdline_add_elfcorehdr(mod_cmdline, elfcorehdr); - /* Inform second kernel about the presence of ACPI tables. */ - for (i = 0; i < CRASH_MAX_MEMORY_RANGES; i++) { - unsigned long start, end; - if ( !( mem_range[i].type == RANGE_ACPI - || mem_range[i].type == RANGE_ACPI_NVS) ) - continue; - start = mem_range[i].start; - end = mem_range[i].end; - cmdline_add_memmap_acpi(mod_cmdline, start, end); + pass_memmap_cmdline = arch_options.pass_memmap_cmdline; + if (pass_memmap_cmdline) { + cmdline_add_memmap(mod_cmdline, crash_memory_range); + /* Inform second kernel about the presence of ACPI tables. */ + for (i = 0; i < CRASH_MAX_MEMORY_RANGES; i++) { + unsigned long start, end; + if ( !( mem_range[i].type == RANGE_ACPI + || mem_range[i].type == RANGE_ACPI_NVS) ) + continue; + start = mem_range[i].start; + end = mem_range[i].end; + cmdline_add_memmap_acpi(mod_cmdline, start, end); + } } return 0; diff --git a/kexec/arch/i386/crashdump-x86.h b/kexec/arch/i386/crashdump-x86.h index 633ee0e..e68b626 100644 --- a/kexec/arch/i386/crashdump-x86.h +++ b/kexec/arch/i386/crashdump-x86.h @@ -30,5 +30,6 @@ int load_crashdump_segments(struct kexec_info *info, char *mod_cmdline, extern struct memory_range crash_memory_range[CRASH_MAX_MEMORY_RANGES]; extern int crash_memory_ranges; +extern int pass_memmap_cmdline; #endif /* CRASHDUMP_X86_H */ diff --git a/kexec/arch/i386/x86-linux-setup.c b/kexec/arch/i386/x86-linux-setup.c index 5884f4d..e8865e1 100644 --- a/kexec/arch/i386/x86-linux-setup.c +++ b/kexec/arch/i386/x86-linux-setup.c @@ -35,8 +35,7 @@ #include "kexec-x86.h" #include "x86-linux-setup.h" #include "../../kexec/kexec-syscall.h" - -#define SETUP_EFI 4 +#include "crashdump-x86.h" void init_linux_parameters(struct x86_linux_param_header *real_mode) { @@ -502,6 +501,11 @@ struct efi_setup_data { struct setup_data { uint64_t next; uint32_t type; +#define SETUP_NONE 0 +#define SETUP_E820_EXT 1 +#define SETUP_DTB 2 +#define SETUP_PCI 3 +#define SETUP_EFI 4 uint32_t len; uint8_t data[0]; } __attribute__((packed)); @@ -602,6 +606,17 @@ struct efi_info { uint32_t efi_memmap_hi; }; +static void add_setup_data(struct kexec_info *info, + struct x86_linux_param_header *real_mode, + struct setup_data *sd) +{ + int sdsize = sizeof(struct setup_data) + sd->len; + + sd->next = real_mode->setup_data; + real_mode->setup_data = add_buffer(info, sd, sdsize, sdsize, getpagesize(), + 0x100000, ULONG_MAX, INT_MAX); +} + /* * setup_efi_data will collect below data and pass them to 2nd kernel. * 1) SMBIOS, fw_vendor, runtime, config_table, they are passed via x86 @@ -611,11 +626,11 @@ struct efi_info { static int setup_efi_data(struct kexec_info *info, struct x86_linux_param_header *real_mode) { - int64_t setup_data_paddr, memmap_paddr; + int64_t memmap_paddr; struct setup_data *sd; struct efi_setup_data *esd; struct efi_mem_descriptor *maps; - int nr_maps, size, sdsize, ret = 0; + int nr_maps, size, ret = 0; struct efi_info *ei = (struct efi_info *)real_mode->efi_info; ret = access("/sys/firmware/efi/systab", F_OK); @@ -648,10 +663,8 @@ static int setup_efi_data(struct kexec_info *info, sd->len = sizeof(*esd); memcpy(sd->data, esd, sizeof(*esd)); free(esd); - sdsize = sd->len + sizeof(struct setup_data); - setup_data_paddr = add_buffer(info, sd, sdsize, sdsize, getpagesize(), - 0x100000, ULONG_MAX, INT_MAX); - real_mode->setup_data = setup_data_paddr; + + add_setup_data(info, real_mode, sd); size = nr_maps * sizeof(struct efi_mem_descriptor); memmap_paddr = add_buffer(info, maps, size, size, getpagesize(), @@ -669,6 +682,98 @@ out: return ret; } +static void add_e820_map_from_mr(struct x86_linux_param_header *real_mode, + struct e820entry *e820, struct memory_range *range, int nr_range) +{ + int i; + + for (i = 0; i < nr_range; i++) { + e820[i].addr = range[i].start; + e820[i].size = range[i].end - range[i].start; + switch (range[i].type) { + case RANGE_RAM: + e820[i].type = E820_RAM; + break; + case RANGE_ACPI: + e820[i].type = E820_ACPI; + break; + case RANGE_ACPI_NVS: + e820[i].type = E820_NVS; + break; + default: + case RANGE_RESERVED: + e820[i].type = E820_RESERVED; + break; + } + dbgprintf("%016lx-%016lx (%d)\n", + e820[i].addr, + e820[i].addr + e820[i].size - 1, + e820[i].type); + + if (range[i].type != RANGE_RAM) + continue; + if ((range[i].start <= 0x100000) && range[i].end > 0x100000) { + unsigned long long mem_k = (range[i].end >> 10) - (0x100000 >> 10); + real_mode->ext_mem_k = mem_k; + real_mode->alt_mem_k = mem_k; + if (mem_k > 0xfc00) { + real_mode->ext_mem_k = 0xfc00; /* 64M */ + } + if (mem_k > 0xffffffff) { + real_mode->alt_mem_k = 0xffffffff; + } + } + } +} + +static void setup_e820_ext(struct kexec_info *info, struct x86_linux_param_header *real_mode, + struct memory_range *range, int nr_range) +{ + struct setup_data *sd; + struct e820entry *e820; + int nr_range_ext; + + nr_range_ext = nr_range - E820MAX; + sd = xmalloc(sizeof(struct setup_data) + nr_range_ext * sizeof(struct e820entry)); + sd->next = 0; + sd->len = nr_range_ext * sizeof(struct e820entry); + sd->type = SETUP_E820_EXT; + + e820 = (struct e820entry *) sd->data; + dbgprintf("Extended E820 via setup_data:\n"); + add_e820_map_from_mr(real_mode, e820, range + E820MAX, nr_range_ext); + add_setup_data(info, real_mode, sd); +} + +static void setup_e820(struct kexec_info *info, struct x86_linux_param_header *real_mode) +{ + struct memory_range *range; + int nr_range, nr_range_saved; + + + if (info->kexec_flags & KEXEC_ON_CRASH && !pass_memmap_cmdline) { + range = crash_memory_range; + nr_range = crash_memory_ranges; + } else { + range = info->memory_range; + nr_range = info->memory_ranges; + } + + nr_range_saved = nr_range; + if (nr_range > E820MAX) { + nr_range = E820MAX; + } + + real_mode->e820_map_nr = nr_range; + dbgprintf("E820 memmap:\n"); + add_e820_map_from_mr(real_mode, real_mode->e820_map, range, nr_range); + + if (nr_range_saved > E820MAX) { + dbgprintf("extra E820 memmap are passed via setup_data\n"); + setup_e820_ext(info, real_mode, range, nr_range_saved); + } +} + static int get_efi_mem_desc_version(struct x86_linux_param_header *real_mode) { @@ -702,10 +807,6 @@ static void setup_efi_info(struct kexec_info *info, void setup_linux_system_parameters(struct kexec_info *info, struct x86_linux_param_header *real_mode) { - /* Fill in information the BIOS would usually provide */ - struct memory_range *range; - int i, ranges; - /* get subarch from running kernel */ setup_subarch(real_mode); if (bzImage_support_efi_boot) @@ -746,51 +847,7 @@ void setup_linux_system_parameters(struct kexec_info *info, /* another safe default */ real_mode->aux_device_info = 0; - range = info->memory_range; - ranges = info->memory_ranges; - if (ranges > E820MAX) { - if (!(info->kexec_flags & KEXEC_ON_CRASH)) - /* - * this e820 not used for capture kernel, see - * do_bzImage_load() - */ - fprintf(stderr, - "Too many memory ranges, truncating...\n"); - ranges = E820MAX; - } - real_mode->e820_map_nr = ranges; - for(i = 0; i < ranges; i++) { - real_mode->e820_map[i].addr = range[i].start; - real_mode->e820_map[i].size = range[i].end - range[i].start; - switch (range[i].type) { - case RANGE_RAM: - real_mode->e820_map[i].type = E820_RAM; - break; - case RANGE_ACPI: - real_mode->e820_map[i].type = E820_ACPI; - break; - case RANGE_ACPI_NVS: - real_mode->e820_map[i].type = E820_NVS; - break; - default: - case RANGE_RESERVED: - real_mode->e820_map[i].type = E820_RESERVED; - break; - } - if (range[i].type != RANGE_RAM) - continue; - if ((range[i].start <= 0x100000) && range[i].end > 0x100000) { - unsigned long long mem_k = (range[i].end >> 10) - (0x100000 >> 10); - real_mode->ext_mem_k = mem_k; - real_mode->alt_mem_k = mem_k; - if (mem_k > 0xfc00) { - real_mode->ext_mem_k = 0xfc00; /* 64M */ - } - if (mem_k > 0xffffffff) { - real_mode->alt_mem_k = 0xffffffff; - } - } - } + setup_e820(info, real_mode); /* fill the EDD information */ setup_edd_info(real_mode); -- 1.8.5.3