----- Original Message ----- > The kernel might have added a new elf-note of type NT_NOCOREDUMP for various > reasons. This patch teaches crash tool to look for the same inside a vmcore > before further analysis. If present, display the error description and exit > early. > > Signed-off-by: K.Prasad <prasad@xxxxxxxxxxxxxxxxxx> At this point, I'll admit I'm not sure I totally understand this patch or what the dumpfile header layout would look like. Your new "myload64" pointer is not pointing to a PT_LOAD, but rather the first PT_NOTE, so its name doesn't even make sense in that respect. And for that matter, I don't see why you didn't just use the currently existing nd->notes64 pointer, which points to the same place? Also, the re-definition of the currently-existing "size" value scares the hell out of me w/respect to backwards-compatibility. And lastly, if I'm not mistaken, when you do the realloc() of tmp_elf_header, it may return a different address -- so wouldn't nd->elf_header be left pointing to the old buffer? And by extension, stale pointer values could be left in nd->elf64, nd->num_pt_load_segments, nd->notes64, nd->load64, etc? I would rather that you is_netdump() is left intact -- except for a call to a new "check_nocoredump()" function, one which does not tinker with the is_netdump() pointers, sizes, buffers, etc... Let that function do its own thing, and if it finds that there's no coredump, then it's not going to return and we're done. But in 99.99% of the time, there will be a coredump, and your function will not have screwed around with any of is_netdump()'s bookkeeping. Anyway, when the feature is accepted upstream in the kernel, and then by makedumpfile, we'll revisit this. Thanks, Dave > --- > netdump.c | 136 > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- > 1 files changed, 131 insertions(+), 5 deletions(-) > > diff --git a/netdump.c b/netdump.c > index 1e9960c..3b4edec 100644 > --- a/netdump.c > +++ b/netdump.c > @@ -95,6 +95,74 @@ map_cpus_to_prstatus(void) > } > > /* > + * Temporary definition of new elf-note type for compilation > purposes. > + * Not required when run on a new kernel containing this definition. > + */ > +#define NT_NOCOREDUMP 21 > + > +/* > + * Function to verify if the vmcore contains and elf-note of type > NT_NOCOREDUMP. > + * The kernel adds such an elf-note when it is known that the crash > is > + * triggered due to a reason that does not need analysis of the > entire kernel > + * memory dump (e.g. crash triggered due to a faulty memory DIMM). > + */ > +static void > +has_nt_nocoredump(void *note_ptr, unsigned long size_note) > +{ > + Elf32_Nhdr *note32 = NULL; > + Elf64_Nhdr *note64 = NULL; > + size_t tot, len = 0; > + int num = 0; > + > + for (tot = 0; tot < size_note; tot += len) { > + if (machine_type("X86_64")) { > + note64 = note_ptr + tot; > + /* > + * If vmcore is generated due to fatal hardware > + * errors (such as Machine Check Exception, we only have > + * a 'slim' crashdump. Don't analyse further, inform the > + * user about it and exit. > + */ > + if (note64->n_type == NT_NOCOREDUMP) { > + fprintf(fp, "\"System crashed due to a hardware" > + " memory error. No coredump" > + " available.\"\n"); > + > + /* Do we have an accompanying error message? */ > + if (note64->n_descsz == 0) > + goto exit; > + fprintf(fp,"Nocoredump Reason: %s", > + (char *)note64 + sizeof(Elf64_Nhdr)); > + } > + > + len = sizeof(Elf64_Nhdr); > + len = roundup(len + note64->n_namesz, 4); > + len = roundup(len + note64->n_descsz, 4); > + } else if (machine_type("X86")) { > + note32 = note_ptr + tot; > + if (note32->n_type == NT_NOCOREDUMP) { > + fprintf(fp, "\"System crashed due to a hardware" > + " memory error. No coredump" > + " available.\"\n"); > + > + /* Do we have an accompanying error message? */ > + if (note32->n_descsz == 0) > + goto exit; > + > + fprintf(fp,"Nocoredump Reason: %s", > + (char *)note32 + sizeof(Elf32_Nhdr)); > +exit: > + clean_exit(0); > + } > + > + len = sizeof(Elf32_Nhdr); > + len = roundup(len + note32->n_namesz, 4); > + len = roundup(len + note32->n_descsz, 4); > + } > + } > +} > + > +/* > * Determine whether a file is a netdump/diskdump/kdump creation, > * and if TRUE, initialize the vmcore_data structure. > */ > @@ -103,12 +171,12 @@ is_netdump(char *file, ulong source_query) > { > int i, fd, swap; > Elf32_Ehdr *elf32; > - Elf32_Phdr *load32; > + Elf32_Phdr *load32, *myload32; > Elf64_Ehdr *elf64; > - Elf64_Phdr *load64; > + Elf64_Phdr *load64, *myload64; > char eheader[MIN_NETDUMP_ELF_HEADER_SIZE]; > char buf[BUFSIZE]; > - size_t size, len, tot; > + size_t size, mysize, len, tot; > Elf32_Off offset32; > Elf64_Off offset64; > ulong tmp_flags; > @@ -195,7 +263,10 @@ is_netdump(char *file, ulong source_query) > > load32 = (Elf32_Phdr *) > &eheader[sizeof(Elf32_Ehdr)+sizeof(Elf32_Phdr)]; > - size = (size_t)load32->p_offset; > + myload32 = (Elf32_Phdr *) > + &eheader[sizeof(Elf32_Ehdr)]; > + > + size = (size_t)myload32->p_offset; > > if ((load32->p_offset & (MIN_PAGE_SIZE-1)) && > (load32->p_align == 0)) > @@ -249,7 +320,10 @@ is_netdump(char *file, ulong source_query) > > load64 = (Elf64_Phdr *) > &eheader[sizeof(Elf64_Ehdr)+sizeof(Elf64_Phdr)]; > - size = (size_t)load64->p_offset; > + myload64 = (Elf64_Phdr *) > + &eheader[sizeof(Elf64_Ehdr)]; > + > + size = (size_t)myload64->p_offset; > if ((load64->p_offset & (MIN_PAGE_SIZE-1)) && > (load64->p_align == 0)) > tmp_flags |= KDUMP_ELF64; > @@ -362,6 +436,58 @@ is_netdump(char *file, ulong source_query) > &nd->elf_header[sizeof(Elf64_Ehdr)]; > nd->load64 = (Elf64_Phdr *) > &nd->elf_header[sizeof(Elf64_Ehdr)+sizeof(Elf64_Phdr)]; > + /* > + * Find out if there exists an elf-note of type NT_NOCOREDUMP. > + * If so, exit early from crash analysis after displaying the > + * description string. > + * > + * Allocate a temporary buffer to store the PT_NOTE section and > + * loop through them to look for NT_NOCOREDUMP. > + */ > + for (i = 0; i < elf64->e_phnum; i++, myload64++) { > + mysize += myload64->p_memsz; > + if (myload64->p_type == PT_NOTE) { > + break; > + } > + } > + > + if (mysize == 0) { > + fprintf(stderr, "No PT_NOTE section found\n"); > + clean_exit(1); > + } > + > + /* > + * Size of the buffer should accommodate the Elf_Ehdr, Elf_Phdr > + * and all sections upto the first PT_NOTE. > + */ > + mysize += size; > + tmp_elf_header = realloc(tmp_elf_header, mysize); > + if (tmp_elf_header == NULL) { > + fprintf(stderr, "cannot malloc notes buffer\n"); > + clean_exit(1); > + } > + if (FLAT_FORMAT()) { > + if (!read_flattened_format(fd, 0, tmp_elf_header, mysize)) { > + free(tmp_elf_header); > + goto bailout; > + } > + } else { > + if (lseek(fd, 0, SEEK_SET) != 0) { > + sprintf(buf, "%s: lseek", file); > + perror(buf); > + goto bailout; > + } > + if (read(fd, tmp_elf_header, mysize) != mysize) { > + sprintf(buf, "%s: read", file); > + perror(buf); > + free(tmp_elf_header); > + goto bailout; > + } > + } > + > + has_nt_nocoredump((char *)tmp_elf_header + myload64->p_offset, > + myload64->p_memsz); > + > if (DUMPFILE_FORMAT(nd->flags) == NETDUMP_ELF64) > nd->page_size = (uint)nd->load64->p_align; > dump_Elf64_Ehdr(nd->elf64); > -- > 1.7.4.1 > > -- Crash-utility mailing list Crash-utility@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/crash-utility