Hello Indoh-san, Sorry for the too late response, please see below. >This patch fix a problem that makedumpfile cannot handle a dumpfile >which is captured by sadump in KASLR enabled kernel. > >When KASLR feature is enabled, a kernel is placed on the memory randomly >and therefore makedumpfile cannot handle a dumpfile captured by sadump >because addresses of kernel symbols in System.map or vmlinux are >different from actual addresses. > >To solve this problem, we need to calculate kaslr offset(the difference >between original symbol address and actual address) and phys_base, and >adjust symbol table of makedumpfile. In the case of dumpfile of kdump, >these information is included in the header, but dumpfile of sadump does >not have such a information. > >This patch calculate kaslr offset and phys_base to solve this problem. >Please see the comment in the calc_kaslr_offset() for the detail idea. >The basic idea is getting register (IDTR and CR3) from dump header, and >calculate kaslr_offset/phys_base using them. > >Signed-off-by: Takao Indoh <indou.takao at jp.fujitsu.com> >--- > makedumpfile.c | 11 ++ > makedumpfile.h | 6 +- > sadump_info.c | 415 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- > 3 files changed, 429 insertions(+), 3 deletions(-) > >diff --git a/makedumpfile.c b/makedumpfile.c >index 5f2ca7d..4fa1b3a 100644 >--- a/makedumpfile.c >+++ b/makedumpfile.c >@@ -1554,6 +1554,10 @@ get_symbol_info(void) > SYMBOL_INIT(demote_segment_4k, "demote_segment_4k"); > SYMBOL_INIT(cur_cpu_spec, "cur_cpu_spec"); > >+ SYMBOL_INIT(divide_error, "divide_error"); >+ SYMBOL_INIT(idt_table, "idt_table"); >+ SYMBOL_INIT(saved_command_line, "saved_command_line"); >+ > return TRUE; > } > >@@ -2249,6 +2253,13 @@ write_vmcoreinfo_data(void) > WRITE_NUMBER_UNSIGNED("kimage_voffset", kimage_voffset); > #endif > >+ if (info->phys_base) >+ fprintf(info->file_vmcoreinfo, "%s%lu\n", STR_NUMBER("phys_base"), >+ info->phys_base); >+ if (info->kaslr_offset) >+ fprintf(info->file_vmcoreinfo, "%s%lx\n", STR_KERNELOFFSET, >+ info->kaslr_offset); >+ > /* > * write the source file of 1st kernel > */ >diff --git a/makedumpfile.h b/makedumpfile.h >index f48dc0b..db75379 100644 >--- a/makedumpfile.h >+++ b/makedumpfile.h >@@ -45,6 +45,7 @@ > #include "sadump_mod.h" > #include <pthread.h> > #include <semaphore.h> >+#include <inttypes.h> > > #define VMEMMAPSTART 0xffffea0000000000UL > #define BITS_PER_WORD 64 >@@ -1599,6 +1600,9 @@ struct symbol_table { > unsigned long long cpu_online_mask; > unsigned long long __cpu_online_mask; > unsigned long long kexec_crash_image; >+ unsigned long long divide_error; >+ unsigned long long idt_table; >+ unsigned long long saved_command_line; > > /* > * symbols on ppc64 arch >@@ -1960,7 +1964,7 @@ int iomem_for_each_line(char *match, int (*callback)(void *data, int nr, > unsigned long length), > void *data); > int is_bigendian(void); >- >+int get_symbol_info(void); > > /* > * for Xen extraction >diff --git a/sadump_info.c b/sadump_info.c >index 7dd22e7..485fa80 100644 >--- a/sadump_info.c >+++ b/sadump_info.c >@@ -1035,6 +1035,410 @@ sadump_get_max_mapnr(void) > > #ifdef __x86_64__ > >+/* >+ * Get address of vector0 interrupt handler (Devide Error) form Interrupt >+ * Descriptor Table. >+ */ >+static unsigned long >+get_vec0_addr(ulong idtr) >+{ >+ struct gate_struct64 { >+ uint16_t offset_low; >+ uint16_t segment; >+ uint32_t ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; >+ uint16_t offset_middle; >+ uint32_t offset_high; >+ uint32_t zero1; >+ } __attribute__((packed)) gate; >+ >+ readmem(PADDR, idtr, &gate, sizeof(gate)); >+ >+ return ((ulong)gate.offset_high << 32) >+ + ((ulong)gate.offset_middle << 16) >+ + gate.offset_low; >+} >+ >+/* >+ * Parse a string of [size[KMG]@]offset[KMG] >+ * Import from Linux kernel(lib/cmdline.c) >+ */ >+static ulong memparse(char *ptr, char **retptr) >+{ >+ char *endptr; >+ >+ unsigned long long ret = strtoull(ptr, &endptr, 0); >+ >+ switch (*endptr) { >+ case 'E': >+ case 'e': >+ ret <<= 10; >+ case 'P': >+ case 'p': >+ ret <<= 10; >+ case 'T': >+ case 't': >+ ret <<= 10; >+ case 'G': >+ case 'g': >+ ret <<= 10; >+ case 'M': >+ case 'm': >+ ret <<= 10; >+ case 'K': >+ case 'k': >+ ret <<= 10; >+ endptr++; >+ default: >+ break; >+ } >+ >+ if (retptr) >+ *retptr = endptr; >+ >+ return ret; >+} >+ >+/* >+ * Find "elfcorehdr=" in the boot parameter of kernel and return the address >+ * of elfcorehdr. >+ */ >+static ulong >+get_elfcorehdr(ulong cr3) >+{ >+ char cmdline[BUFSIZE], *ptr; >+ ulong cmdline_vaddr; >+ ulong cmdline_paddr; >+ ulong buf_vaddr, buf_paddr; >+ char *end; >+ ulong elfcorehdr_addr = 0, elfcorehdr_size = 0; >+ >+ if (SYMBOL(saved_command_line) == NOT_FOUND_SYMBOL) { >+ ERRMSG("Can't get the symbol of saved_command_line.\n"); >+ return 0; >+ } >+ cmdline_vaddr = SYMBOL(saved_command_line); >+ if ((cmdline_paddr = vtop4_x86_64_pagetable(cmdline_vaddr, cr3)) == NOT_PADDR) >+ return 0; >+ >+ DEBUG_MSG("sadump: cmdline vaddr: %lx\n", cmdline_vaddr); >+ DEBUG_MSG("sadump: cmdline paddr: %lx\n", cmdline_paddr); >+ >+ if (!readmem(PADDR, cmdline_paddr, &buf_vaddr, sizeof(ulong))) >+ return 0; >+ >+ if ((buf_paddr = vtop4_x86_64_pagetable(buf_vaddr, cr3)) == NOT_PADDR) >+ return 0; >+ >+ DEBUG_MSG("sadump: cmdline buf vaddr: %lx\n", buf_vaddr); >+ DEBUG_MSG("sadump: cmdline buf paddr: %lx\n", buf_paddr); >+ >+ memset(cmdline, 0, BUFSIZE); >+ if (!readmem(PADDR, buf_paddr, cmdline, BUFSIZE)) >+ return 0; >+ >+ ptr = strstr(cmdline, "elfcorehdr="); >+ if (!ptr) >+ return 0; >+ >+ DEBUG_MSG("sadump: 2nd kernel detected.\n"); >+ >+ ptr += strlen("elfcorehdr="); >+ elfcorehdr_addr = memparse(ptr, &end); >+ if (*end == '@') { >+ elfcorehdr_size = elfcorehdr_addr; >+ elfcorehdr_addr = memparse(end + 1, &end); >+ } >+ >+ DEBUG_MSG("sadump: elfcorehdr_addr: %lx\n", elfcorehdr_addr); >+ DEBUG_MSG("sadump: elfcorehdr_size: %lx\n", elfcorehdr_size); >+ >+ return elfcorehdr_addr; >+} >+ >+/* >+ * Get vmcoreinfo from elfcorehdr. >+ * Some codes are imported from Linux kernel(fs/proc/vmcore.c) >+ */ >+static int >+get_vmcoreinfo_in_kdump_kernel(ulong elfcorehdr, ulong *addr, int *len) >+{ >+ unsigned char e_ident[EI_NIDENT]; >+ Elf64_Ehdr ehdr; >+ Elf64_Phdr phdr; >+ Elf64_Nhdr nhdr; >+ ulong ptr; >+ ulong nhdr_offset = 0; >+ int i; >+ >+ if (!readmem(PADDR, elfcorehdr, e_ident, EI_NIDENT)) >+ return FALSE; >+ >+ if (e_ident[EI_CLASS] != ELFCLASS64) { >+ ERRMSG("Only ELFCLASS64 is supportd\n"); >+ return FALSE; >+ } >+ >+ if (!readmem(PADDR, elfcorehdr, &ehdr, sizeof(ehdr))) >+ return FALSE; >+ >+ /* Sanity Check */ >+ if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 || >+ (ehdr.e_type != ET_CORE) || >+ ehdr.e_ident[EI_CLASS] != ELFCLASS64 || >+ ehdr.e_ident[EI_VERSION] != EV_CURRENT || >+ ehdr.e_version != EV_CURRENT || >+ ehdr.e_ehsize != sizeof(Elf64_Ehdr) || >+ ehdr.e_phentsize != sizeof(Elf64_Phdr) || >+ ehdr.e_phnum == 0) { >+ ERRMSG("Invalid elf header\n"); >+ return FALSE; >+ } >+ >+ ptr = elfcorehdr + ehdr.e_phoff; >+ for (i = 0; i < ehdr.e_phnum; i++) { >+ ulong offset; >+ char name[16]; >+ >+ if (!readmem(PADDR, ptr, &phdr, sizeof(phdr))) >+ return FALSE; >+ >+ ptr += sizeof(phdr); >+ if (phdr.p_type != PT_NOTE) >+ continue; >+ >+ offset = phdr.p_offset; >+ if (!readmem(PADDR, offset, &nhdr, sizeof(nhdr))) >+ return FALSE; >+ >+ offset += divideup(sizeof(Elf64_Nhdr), sizeof(Elf64_Word))* >+ sizeof(Elf64_Word); >+ memset(name, 0, sizeof(name)); >+ if (!readmem(PADDR, offset, name, sizeof(name))) >+ return FALSE; >+ >+ if(!strcmp(name, "VMCOREINFO")) { >+ nhdr_offset = offset; >+ break; >+ } >+ } >+ >+ if (!nhdr_offset) >+ return FALSE; >+ >+ *addr = nhdr_offset + >+ divideup(nhdr.n_namesz, sizeof(Elf64_Word))* >+ sizeof(Elf64_Word); >+ *len = nhdr.n_descsz; >+ >+ DEBUG_MSG("sadump: vmcoreinfo addr: %lx\n", *addr); >+ DEBUG_MSG("sadump: vmcoreinfo len: %d\n", *len); >+ >+ return TRUE; >+} >+ >+/* >+ * Check if current kaslr_offset/phys_base is for 1st kernel or 2nd kernel. >+ * If we are in 2nd kernel, get kaslr_offset/phys_base from vmcoreinfo. >+ * >+ * 1. Get command line and try to retrieve "elfcorehdr=" boot parameter >+ * 2. If "elfcorehdr=" is not found in command line, we are in 1st kernel. >+ * There is nothing to do. >+ * 3. If "elfcorehdr=" is found, we are in 2nd kernel. Find vmcoreinfo >+ * using "elfcorehdr=" and retrieve kaslr_offset/phys_base from vmcoreinfo. >+ */ >+int >+get_kaslr_offset_from_vmcoreinfo(ulong cr3, ulong *kaslr_offset, >+ ulong *phys_base) >+{ >+ ulong elfcorehdr_addr = 0; >+ ulong vmcoreinfo_addr; >+ int vmcoreinfo_len; >+ char *buf, *pos; >+ int ret = FALSE; >+ >+ elfcorehdr_addr = get_elfcorehdr(cr3); >+ if (!elfcorehdr_addr) >+ return FALSE; >+ >+ if (!get_vmcoreinfo_in_kdump_kernel(elfcorehdr_addr, &vmcoreinfo_addr, >+ &vmcoreinfo_len)) >+ return FALSE; >+ >+ if (!vmcoreinfo_len) >+ return FALSE; >+ >+ DEBUG_MSG("sadump: Find vmcoreinfo in kdump memory\n"); >+ >+ if (!(buf = malloc(vmcoreinfo_len))) { >+ ERRMSG("Can't allocate vmcoreinfo buffer.\n"); >+ return FALSE; >+ } >+ >+ if (!readmem(PADDR, vmcoreinfo_addr, buf, vmcoreinfo_len)) >+ goto finish; >+ >+ pos = strstr(buf, STR_NUMBER("phys_base")); >+ if (!pos) >+ goto finish; >+ *phys_base = strtoull(pos + strlen(STR_NUMBER("phys_base")), NULL, 0); >+ >+ pos = strstr(buf, STR_KERNELOFFSET); >+ if (!pos) >+ goto finish; >+ *kaslr_offset = strtoull(pos + strlen(STR_KERNELOFFSET), NULL, 16); >+ ret = TRUE; >+ >+finish: >+ free(buf); >+ return ret; >+} >+ >+/* >+ * Calculate kaslr_offset and phys_base >+ * >+ * kaslr_offset: >+ * The difference between original address in vmlinux and actual address >+ * placed randomly by kaslr feature. To be more accurate, >+ * kaslr_offset = actual address - original address >+ * >+ * phys_base: >+ * Physical address where the kerenel is placed. In other words, it's a >+ * physical address of __START_KERNEL_map. This is also decided randomly by >+ * kaslr. >+ * >+ * kaslr offset and phys_base are calculated as follows: >+ * >+ * kaslr_offset: >+ * 1) Get IDTR and CR3 value from the dump header. >+ * 2) Get a virtual address of IDT from IDTR value >+ * --- (A) >+ * 3) Translate (A) to physical address using CR3, which points a top of >+ * page table. >+ * --- (B) >+ * 4) Get an address of vector0 (Devide Error) interrupt handler from >+ * IDT, which are pointed by (B). >+ * --- (C) >+ * 5) Get an address of symbol "divide_error" form vmlinux >+ * --- (D) >+ * >+ * Now we have two addresses: >+ * (C)-> Actual address of "divide_error" >+ * (D)-> Original address of "divide_error" in the vmlinux >+ * >+ * kaslr_offset can be calculated by the difference between these two >+ * value. >+ * >+ * phys_base; >+ * 1) Get IDT virtual address from vmlinux >+ * --- (E) >+ * >+ * So phys_base can be calculated using relationship of directly mapped >+ * address. >+ * >+ * phys_base = >+ * Physical address(B) - >+ * (Virtual address(E) + kaslr_offset - __START_KERNEL_map) >+ * >+ * Note that the address (A) cannot be used instead of (E) because (A) is >+ * not direct map address, it's a fixed map address. >+ * >+ * This solution works in most every case, but does not work in the >+ * following case. >+ * >+ * 1) If the dump is captured on early stage of kernel boot, IDTR points >+ * early IDT table(early_idts) instead of normal IDT(idt_table). >+ * 2) If the dump is captured whle kdump is working, IDTR points ^i >+ * IDT table of 2nd kernel, not 1st kernel. These cases sound like only for outside dump mechanisms like sadump, right ? I think the functions for the case 2) are extra features while calculating kaslr_offset is an essential solution for the KASLR problem. I hope you split this patch in two since it's large. Concretely, >+ * >+ * Current implementation does not support the case 1), need >+ * enhancement in the future. For the case 2), get kaslr_offset and >+ * phys_base as follows. >+ * >+ * 1) Get kaslr_offset and phys_base using the above solution. >+ * 2) Get kernel boot parameter from "saved_command_line" >+ * 3) If "elfcorehdr=" is not included in boot parameter, we are in the >+ * first kernel, nothing to do any more. >+ * 4) If "elfcorehdr=" is included in boot parameter, we are in the 2nd >+ * kernel. Retrieve vmcoreinfo from address of "elfcorehdr=" and >+ * get kaslr_offset and phys_base from vmcoreinfo. >+ */ 1) -> [PATCH 3/4] 2)-4) -> [PATCH 4/4] Thanks, Atsushi Kumagai >+int >+calc_kaslr_offset(void) >+{ >+ struct sadump_header *sh = si->sh_memory; >+ uint64_t idtr = 0, cr3 = 0, idtr_paddr; >+ struct sadump_smram_cpu_state smram, zero; >+ int apicid; >+ unsigned long divide_error_vmcore, divide_error_vmlinux; >+ >+ unsigned long kaslr_offset_kdump, phys_base_kdump; >+ unsigned long kaslr_offset, phys_base; >+ >+ memset(&zero, 0, sizeof(zero)); >+ for (apicid = 0; apicid < sh->nr_cpus; ++apicid) { >+ if (!get_smram_cpu_state(apicid, &smram)) { >+ ERRMSG("get_smram_cpu_state error\n"); >+ return FALSE; >+ } >+ >+ if (memcmp(&smram, &zero, sizeof(smram)) != 0) >+ break; >+ } >+ if (apicid >= sh->nr_cpus) { >+ ERRMSG("Can't get smram state\n"); >+ return FALSE; >+ } >+ >+ idtr = ((uint64_t)smram.IdtUpper)<<32 | (uint64_t)smram.IdtLower; >+ cr3 = smram.Cr3; >+ >+ /* Convert virtual address of IDT table to physical address */ >+ if ((idtr_paddr = vtop4_x86_64_pagetable(idtr, cr3)) == NOT_PADDR) >+ return FALSE; >+ >+ /* Now we can calculate kaslr_offset and phys_base */ >+ divide_error_vmlinux = SYMBOL(divide_error); >+ divide_error_vmcore = get_vec0_addr(idtr_paddr); >+ kaslr_offset = divide_error_vmcore - divide_error_vmlinux; >+ phys_base = idtr_paddr - >+ (SYMBOL(idt_table) + kaslr_offset - __START_KERNEL_map); >+ >+ info->kaslr_offset = kaslr_offset; >+ info->phys_base = phys_base; >+ >+ DEBUG_MSG("sadump: idtr=%" PRIx64 "\n", idtr); >+ DEBUG_MSG("sadump: cr3=%" PRIx64 "\n", cr3); >+ DEBUG_MSG("sadump: idtr(phys)=%" PRIx64 "\n", idtr_paddr); >+ DEBUG_MSG("sadump: devide_error(vmlinux)=%lx\n", >+ divide_error_vmlinux); >+ DEBUG_MSG("sadump: devide_error(vmcore)=%lx\n", >+ divide_error_vmcore); >+ >+ /* Reload symbol */ >+ if (!get_symbol_info()) >+ return FALSE; >+ >+ /* >+ * Check if current kaslr_offset/phys_base is for 1st kernel or 2nd >+ * kernel. If we are in 2nd kernel, get kaslr_offset/phys_base >+ * from vmcoreinfo >+ */ >+ if (get_kaslr_offset_from_vmcoreinfo(cr3, &kaslr_offset_kdump, >+ &phys_base_kdump)) { >+ info->kaslr_offset = kaslr_offset_kdump; >+ info->phys_base = phys_base_kdump; >+ >+ /* Reload symbol */ >+ if (!get_symbol_info()) >+ return FALSE; >+ } >+ >+ DEBUG_MSG("sadump: kaslr_offset=%lx\n", info->kaslr_offset); >+ DEBUG_MSG("sadump: phys_base=%lx\n", info->phys_base); >+ >+ return TRUE; >+} >+ > int > sadump_virt_phys_base(void) > { >@@ -1065,6 +1469,9 @@ sadump_virt_phys_base(void) > } > > failed: >+ if (calc_kaslr_offset()) >+ return TRUE; >+ > info->phys_base = 0; > > DEBUG_MSG("sadump: failed to calculate phys_base; default to 0\n"); >@@ -1518,10 +1925,14 @@ cpu_to_apicid(int cpu, int *apicid) > if (!readmem(VADDR, SYMBOL(x86_bios_cpu_apicid_early_ptr), > &early_ptr, sizeof(early_ptr))) > return FALSE; >- >+ /* >+ * Note: SYMBOL(name) value is adjusted by info->kaslr_offset, >+ * but per_cpu symbol does not need to be adjusted becasue it >+ * is not affected by kaslr. >+ */ > apicid_addr = early_ptr > ? SYMBOL(x86_bios_cpu_apicid_early_map)+cpu*sizeof(uint16_t) >- : per_cpu_ptr(SYMBOL(x86_bios_cpu_apicid), cpu); >+ : per_cpu_ptr(SYMBOL(x86_bios_cpu_apicid) - info->kaslr_offset, cpu); > > if (!readmem(VADDR, apicid_addr, &apicid_u16, sizeof(uint16_t))) > return FALSE; >-- >2.9.5