On Friday, April 12, 2013 08:24:12 AM Eric W. Biederman wrote: > Thomas Renninger <trenn at suse.de> writes: > > Currently ranges are passed via kernel boot parameters: > > memmap=exactmap memmap=X#Y memmap= > > > > Pass them via e820 table directly instead. > > Reading through this code I am not seeing us mark areas of memory that > we may not use as reserved. Am I missing something? They are added. acpi and reserved (only like in the original kernel) come from crash_memory_range[] array (like in cmdline_add_memmap_acpi and cmdline_add_memmap_reserved and the free to use for crash kernel memory is coming from the array (memmap_p): cmdline_add_memmap_acpi I see that the merging function is not that nice to read, but I couldn't come up with something better for now. In fact it should work exactly the same way as before with memmap= passing. There is something odd with not passing the hightest e820 entry. It seem to be needed, but it was not passed via memmap= as well?, so my version should also work and a test did: The dumpfile is saved to /root/abuild/dumps/2013-04-15-12:59/vmcore file size: 3251795465 free total Mem: 32633312 I answer more detailed on Yinghai's mail. Thomas > Those areas need to be marked reserved or else the pci resource > allocator in the kernel will think it is ok to put pci memory there. > > Eric > > > CC: Simon Horman <horms at verge.net.au> > > CC: kexec at lists.infradead.org > > CC: H. Peter Anvin <hpa at zytor.com> > > CC: Eric W. Biederman <ebiederm at xmission.com> > > CC: vgoyal at redhat.com > > CC: yinghai at kernel.org > > CC: cpw at sgi.com > > > > Signed-off-by: Thomas Renninger <trenn at suse.de> > > Signed-off-by: Thomas Renninger <Thomas Renninger" trenn at suse.de> > > --- > > > > kexec/arch/i386/crashdump-x86.c | 221 > > ++++++++++++++++++------------------- kexec/arch/i386/x86-linux-setup.c > > | 11 ++- > > 2 files changed, 116 insertions(+), 116 deletions(-) > > > > diff --git a/kexec/arch/i386/crashdump-x86.c > > b/kexec/arch/i386/crashdump-x86.c index f7821bc..8009efe 100644 > > --- a/kexec/arch/i386/crashdump-x86.c > > +++ b/kexec/arch/i386/crashdump-x86.c > > @@ -659,70 +659,6 @@ static void ultoa(unsigned long i, char *str) > > > > } > > > > } > > > > -static void cmdline_add_memmap_internal(char *cmdline, unsigned long > > startk, - unsigned long endk, int type) > > -{ > > - int cmdlen, len; > > - char str_mmap[256], str_tmp[20]; > > - > > - strcpy (str_mmap, " memmap="); > > - ultoa((endk-startk), str_tmp); > > - strcat (str_mmap, str_tmp); > > - > > - if (type == RANGE_RAM) > > - strcat (str_mmap, "K@"); > > - else if (type == RANGE_RESERVED) > > - strcat (str_mmap, "K$"); > > - else if (type == RANGE_ACPI || type == RANGE_ACPI_NVS) > > - strcat (str_mmap, "K#"); > > - > > - ultoa(startk, str_tmp); > > - strcat (str_mmap, str_tmp); > > - strcat (str_mmap, "K"); > > - len = strlen(str_mmap); > > - cmdlen = strlen(cmdline) + len; > > - if (cmdlen > (COMMAND_LINE_SIZE - 1)) > > - die("Command line overflow\n"); > > - strcat(cmdline, str_mmap); > > -} > > - > > -/* Adds the appropriate memmap= options to command line, indicating the > > - * memory regions the new kernel can use to boot into. */ > > -static int cmdline_add_memmap(char *cmdline, struct memory_range > > *memmap_p) -{ > > - int i, cmdlen, len; > > - unsigned long min_sizek = 100; > > - char str_mmap[256]; > > - > > - /* Exact map */ > > - strcpy(str_mmap, " memmap=exactmap"); > > - len = strlen(str_mmap); > > - cmdlen = strlen(cmdline) + len; > > - if (cmdlen > (COMMAND_LINE_SIZE - 1)) > > - die("Command line overflow\n"); > > - strcat(cmdline, str_mmap); > > - > > - for (i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { > > - unsigned long startk, endk; > > - startk = (memmap_p[i].start/1024); > > - endk = ((memmap_p[i].end + 1)/1024); > > - if (!startk && !endk) > > - /* All regions traversed. */ > > - break; > > - > > - /* A region is not worth adding if region size < 100K. It eats > > - * up precious command line length. */ > > - if ((endk - startk) < min_sizek) > > - continue; > > - cmdline_add_memmap_internal(cmdline, startk, endk, RANGE_RAM); > > - } > > - > > - dbgprintf("Command line after adding memmap\n"); > > - dbgprintf("%s\n", cmdline); > > - > > - return 0; > > -} > > - > > > > /* Adds the elfcorehdr= command line parameter to command line. */ > > static int cmdline_add_elfcorehdr(char *cmdline, unsigned long addr) > > { > > > > @@ -803,26 +739,6 @@ static enum coretype get_core_type(struct > > crash_elf_info *elf_info,> > > } > > > > } > > > > -/* Appends memmap=X#Y commandline for ACPI to command line*/ > > -static int cmdline_add_memmap_acpi(char *cmdline, unsigned long start, > > - unsigned long end) > > -{ > > - int align = 1024; > > - unsigned long startk, endk; > > - > > - if (!(end - start)) > > - return 0; > > - > > - startk = start/1024; > > - endk = (end + align - 1)/1024; > > - cmdline_add_memmap_internal(cmdline, startk, endk, RANGE_ACPI); > > - > > - dbgprintf("Command line after adding acpi memmap\n"); > > - dbgprintf("%s\n", cmdline); > > - > > - return 0; > > -} > > - > > > > /* Appends 'acpi_rsdp=' commandline for efi boot crash dump */ > > static void cmdline_add_efi(char *cmdline) > > { > > > > @@ -881,24 +797,101 @@ static void get_backup_area(struct kexec_info > > *info, > > > > info->backup_src_size = BACKUP_SRC_END - BACKUP_SRC_START + 1; > > > > } > > > > -/* Appends memmap=X$Y commandline for reserved memory to command line*/ > > -static int cmdline_add_memmap_reserved(char *cmdline, unsigned long > > start, > > - unsigned long end) > > +/* > > + * This function takes reserved (all kind of) memory from global > > + * crash_memory_range[] memory ranges and takes memory the kdump/crash > > + * kernel is allowed to use from the passed usable_mem memory ranges. > > + * The passed usable_mem ranges are zero (!start && !end) terminated. > > + * > > + * The final memory map is again written into crash_memory_range[] > > + * and intended to get passed as e820 table to the crash kernel > > + */ > > +static int create_final_crash_map(struct memory_range *usable_mem) > > > > { > > > > - int align = 1024; > > - unsigned long startk, endk; > > + int i, m, c, tmp_map1_ranges, tmp_map2_ranges; > > + unsigned long min_sizek = 100; > > + /* crash_memory_map with usable memory ranges cut out */ > > + struct memory_range tmp_map1[MAX_MEMORY_RANGES]; > > + /* merge_map, but small ranges cut out */ > > + struct memory_range tmp_map2[MAX_MEMORY_RANGES]; > > > > - if (!(end - start)) > > - return 0; > > + /* > > + * Ignore usable memory ranges for kdump kernel smaller > > + * than 100k to avoid too much ranges passed > > + * Save the new ranges (exluding lower than 100k ranges) in tmp_map > > + * and store the number of elements in tmp_map_ranges > > + */ > > + for (m = 0, i = 0; i < CRASH_MAX_MEMMAP_NR; i++) { > > + unsigned long startk, endk; > > + startk = (usable_mem[i].start/1024); > > + endk = ((usable_mem[i].end + 1)/1024); > > + if (!startk && !endk) > > + /* All regions traversed. */ > > + break; > > + > > + /* A region is not worth adding if region size < 100K. It eats > > + * up precious command line length. */ > > + if ((endk - startk) < min_sizek) { > > + dbgprintf("Removing: %luk - %luk\n", startk, endk); > > + continue; > > + } else { > > + tmp_map1[m].start = usable_mem[i].start; > > + tmp_map1[m].end = usable_mem[i].end; > > + tmp_map1[m].type = usable_mem[i].type; > > + m++; > > + } > > + } > > + /* No need to check for !start && !end anymore */ > > + tmp_map1_ranges = m; > > > > - startk = start/1024; > > - endk = (end + align - 1)/1024; > > - cmdline_add_memmap_internal(cmdline, startk, endk, RANGE_RESERVED); > > + for(i = 0; i < tmp_map1_ranges; ++i) > > + dbgprintf("%016Lx-%016Lx (%d)\n", tmp_map1[i].start, > > + tmp_map1[i].end, tmp_map1[i].type); > > + > > + /* > > + * Cut out RANGE_RAM regions from crash_memory_ranges and store > > + * them in tmp_map2_ranges > > + */ > > + for (c = 0, i = 0; i < crash_ranges; i++) { > > + if (crash_memory_range[i].type == RANGE_RAM) > > + continue; > > + tmp_map2[c].start = crash_memory_range[i].start; > > + tmp_map2[c].end = crash_memory_range[i].end; > > + tmp_map2[c].type = crash_memory_range[i].type; > > + c++; > > + } > > + tmp_map2_ranges = c; > > + > > + /* > > + * TBD: Check that no ranges overlap? > > + * Can this happen at all? > > + */ > > + for (c = 0, m = 0, i = 0; i < MAX_MEMORY_RANGES; i++) { > > + if (m < tmp_map1_ranges && > > + (c >= tmp_map2_ranges || > > + tmp_map2[c].start > tmp_map1[m].start)) { > > + crash_memory_range[i].start = tmp_map1[m].start; > > + crash_memory_range[i].end = tmp_map1[m].end; > > + crash_memory_range[i].type = RANGE_RAM; > > + m++; > > + continue; > > + } else if (c < tmp_map2_ranges) { > > + crash_memory_range[i] = tmp_map2[c]; > > + c++; > > + continue; > > + } else > > + break; > > + } > > + crash_ranges = i; > > + > > + /* > > + * End address has to be exlusive for e820 map > > + * x - 00010000 > > + * 00010000 - y > > + */ > > + for(i = 0; i < crash_ranges; ++i) > > + crash_memory_range[i].end++; > > > > -#ifdef DEBUG > > - printf("Command line after adding reserved memmap\n"); > > - printf("%s\n", cmdline); > > -#endif > > > > return 0; > > > > } > > > > @@ -944,6 +937,12 @@ int load_crashdump_segments(struct kexec_info *info, > > char* mod_cmdline,> > > return -1; > > > > } > > > > + /* > > + * From now on the memory regions are stored in crash_memory_range[] > > + * Currently the end address is inclusive at this point: > > + * x - 0000ffff > > + * 00010000 - y > > + */ > > > > if (xen_present()) { > > > > if (get_crash_memory_ranges_xen(&mem_range, &crash_ranges, > > > > elf_info.lowmem_limit) < 0) > > > > @@ -971,7 +970,7 @@ int load_crashdump_segments(struct kexec_info *info, > > char* mod_cmdline,> > > get_backup_area(info, mem_range, crash_ranges); > > > > - dbgprintf("CRASH MEMORY RANGES\n"); > > + dbgprintf("TEMPORARY CRASH MEMORY RANGES\n"); > > > > for(i = 0; i < crash_ranges; ++i) > > > > dbgprintf("%016Lx-%016Lx (%d)\n", mem_range[i].start, > > > > @@ -1063,24 +1062,18 @@ int load_crashdump_segments(struct kexec_info > > *info, char* mod_cmdline,> > > dbgprintf("Created elf header segment at 0x%lx\n", elfcorehdr); > > if (delete_memmap(memmap_p, elfcorehdr, memsz) < 0) > > > > return -1; > > > > - cmdline_add_memmap(mod_cmdline, memmap_p); > > > > cmdline_add_efi(mod_cmdline); > > cmdline_add_elfcorehdr(mod_cmdline, elfcorehdr); > > > > - /* Inform second kernel about the presence of ACPI tables. */ > > - for (i = 0; i < MAX_MEMORY_RANGES; i++) { > > - unsigned long start, end; > > - if ( !( mem_range[i].type == RANGE_ACPI > > - || mem_range[i].type == RANGE_ACPI_NVS > > - || mem_range[i].type == RANGE_RESERVED) ) > > - continue; > > - start = mem_range[i].start; > > - end = mem_range[i].end; > > - if (mem_range[i].type == RANGE_RESERVED) > > - cmdline_add_memmap_reserved(mod_cmdline, start, end); > > - else > > - cmdline_add_memmap_acpi(mod_cmdline, start, end); > > - } > > + /* > > + * Redo crash_memory_range so that it can get passed as e820 info > > + */ > > + create_final_crash_map(memmap_p); > > + > > + dbgprintf("FINAL CRASH MEMORY RANGES\n"); > > + for(i = 0; i < crash_ranges; ++i) > > + dbgprintf("%016Lx-%016Lx (%d)\n", mem_range[i].start, > > + mem_range[i].end, mem_range[i].type); > > > > return 0; > > > > } > > > > diff --git a/kexec/arch/i386/x86-linux-setup.c > > b/kexec/arch/i386/x86-linux-setup.c index c538897..82b4bb9 100644 > > --- a/kexec/arch/i386/x86-linux-setup.c > > +++ b/kexec/arch/i386/x86-linux-setup.c > > @@ -505,8 +505,15 @@ void setup_linux_system_parameters(struct kexec_info > > *info,> > > /* another safe default */ > > real_mode->aux_device_info = 0; > > > > - range = info->memory_range; > > - ranges = info->memory_ranges; > > + if (info->kexec_flags & KEXEC_ON_CRASH || > > + info->kexec_flags & KEXEC_PRESERVE_CONTEXT) { > > + range = crash_memory_range; > > + ranges = crash_ranges; > > + } else { > > + range = info->memory_range; > > + ranges = info->memory_ranges; > > + } > > + > > > > if (ranges > E820MAX) { > > > > if (!(info->kexec_flags & KEXEC_ON_CRASH)) > > > > /*