The current code in readpage_elf (and readpage_elf_parallel) is extremely hard to follow. Additionally, it still does not cover all possible cases. For example, attempts to read outside of any ELF segment will end up with phys_start being 0, frac_head a negative number, interpreted as a large positive number by memset() and write past buffer end. Instead of trying to handle even more "corner cases", I rewrote the algorithm from scratch. The basic idea is simple: set a goal to fill the page buffer with data, then work towards that goal by: - filling holes with zeroes (see Note below), - p_filesz portions with file data and - remaining p_memsz portions again with zeroes. Repeat this method for each LOAD until the goal is achieved, or an error occurs. In most cases, the loop runs only once. Note: A "hole" is data at a physical address that is not covered by any ELF LOAD program header. In other words, the ELF file does not specify any data for such a hole (not even zeroes). So, why does makedumpfile fill them with zeroes? It's because makedumpfile works with page granularity (the compressed format does not even have a way to store a partial page), so if only part of a page is stored, a complete page must be provided to make this partial data accessible. Credits to Ivan Delalande <colona at arista.com> who first found the problem and wrote the original fix. Signed-off-by: Petr Tesarik <ptesarik at suse.com> --- elf_info.c | 28 +++++++ elf_info.h | 1 makedumpfile.c | 208 ++++++++++++++++++++++----------------------------------- 3 files changed, 110 insertions(+), 127 deletions(-) --- a/elf_info.c +++ b/elf_info.c @@ -691,6 +691,34 @@ get_max_paddr(void) return max_paddr; } +/* + * Find the LOAD segment which is closest to the requested + * physical address within a given distance. + * If there is no such segment, return a negative number. + */ +int +closest_pt_load(unsigned long long paddr, unsigned long distance) +{ + int i, bestidx; + struct pt_load_segment *pls; + unsigned long bestdist; + + bestdist = distance; + bestidx = -1; + for (i = 0; i < num_pt_loads; ++i) { + pls = &pt_loads[i]; + if (paddr >= pls->phys_end) + continue; + if (paddr >= pls->phys_start) + return i; /* Exact match */ + if (bestdist > pls->phys_start - paddr) { + bestdist = pls->phys_start - paddr; + bestidx = i; + } + } + return bestidx; +} + int get_elf64_ehdr(int fd, char *filename, Elf64_Ehdr *ehdr) { --- a/elf_info.h +++ b/elf_info.h @@ -39,6 +39,7 @@ off_t offset_to_pt_load_end(off_t offset unsigned long long vaddr_to_paddr_general(unsigned long long vaddr); off_t vaddr_to_offset_slow(int fd, char *filename, unsigned long long vaddr); unsigned long long get_max_paddr(void); +int closest_pt_load(unsigned long long paddr, unsigned long distance); int page_is_fractional(off_t page_offset); --- a/makedumpfile.c +++ b/makedumpfile.c @@ -648,73 +648,51 @@ read_from_vmcore_parallel(int fd_memory, static int readpage_elf(unsigned long long paddr, void *bufptr) { - off_t offset1, offset2; - size_t size1, size2; - unsigned long long phys_start, phys_end, frac_head = 0; - - offset1 = paddr_to_offset(paddr); - offset2 = paddr_to_offset(paddr + info->page_size); - phys_start = paddr; - phys_end = paddr + info->page_size; - - /* - * Check the case phys_start isn't aligned by page size like below: - * - * phys_start - * = 0x40ffda7000 - * |<-- frac_head -->|------------- PT_LOAD ------------- - * ----+-----------------------+---------------------+---- - * | pfn:N | pfn:N+1 | ... - * ----+-----------------------+---------------------+---- - * | - * pfn_to_paddr(pfn:N) # page size = 16k - * = 0x40ffda4000 - */ - if (!offset1) { - phys_start = page_head_to_phys_start(paddr); - offset1 = paddr_to_offset(phys_start); - frac_head = phys_start - paddr; - memset(bufptr, 0, frac_head); - } - - /* - * Check the case phys_end isn't aligned by page size like the - * phys_start's case. - */ - if (!offset2) { - phys_end = page_head_to_phys_end(paddr); - offset2 = paddr_to_offset(phys_end); - memset(bufptr + (phys_end - paddr), 0, info->page_size - (phys_end - paddr)); - } + int idx; + off_t offset, size; + void *p, *endp; + unsigned long long phys_start, phys_end; + + p = bufptr; + endp = p + info->page_size; + while (p < endp) { + idx = closest_pt_load(paddr + (p - bufptr), endp - p); + if (idx < 0) + break; + + get_pt_load_extents(idx, &phys_start, &phys_end, &offset, &size); + if (phys_start > paddr + (p - bufptr)) { + memset(p, 0, phys_start - paddr); + p += phys_start - paddr; + } - /* - * Check the separated page on different PT_LOAD segments. - */ - if (offset1 + (phys_end - phys_start) == offset2) { - size1 = phys_end - phys_start; - } else { - for (size1 = 1; size1 < info->page_size - frac_head; size1++) { - offset2 = paddr_to_offset(phys_start + size1); - if (offset1 + size1 != offset2) - break; + offset += paddr - phys_start; + if (size > paddr - phys_start) { + size -= paddr - phys_start; + if (size > endp - p) + size = endp - p; + if (!read_from_vmcore(offset, p, size)) { + ERRMSG("Can't read the dump memory(%s).\n", + info->name_memory); + return FALSE; + } + p += size; + } + if (p < endp) { + size = phys_end - paddr; + if (size > endp - p) + size = endp - p; + memset(p, 0, size); + p += size; } } - if(!read_from_vmcore(offset1, bufptr + frac_head, size1)) { - ERRMSG("Can't read the dump memory(%s).\n", - info->name_memory); + if (p == bufptr) { + ERRMSG("Attempt to read non-existent page at 0x%llx.\n", + paddr); return FALSE; - } - - if (size1 + frac_head != info->page_size) { - size2 = phys_end - (phys_start + size1); - - if(!read_from_vmcore(offset2, bufptr + frac_head + size1, size2)) { - ERRMSG("Can't read the dump memory(%s).\n", - info->name_memory); - return FALSE; - } - } + } else if (p < bufptr) + memset(p, 0, endp - p); return TRUE; } @@ -722,76 +700,52 @@ readpage_elf(unsigned long long paddr, v static int readpage_elf_parallel(int fd_memory, unsigned long long paddr, void *bufptr) { - off_t offset1, offset2; - size_t size1, size2; - unsigned long long phys_start, phys_end, frac_head = 0; - - offset1 = paddr_to_offset(paddr); - offset2 = paddr_to_offset(paddr + info->page_size); - phys_start = paddr; - phys_end = paddr + info->page_size; - - /* - * Check the case phys_start isn't aligned by page size like below: - * - * phys_start - * = 0x40ffda7000 - * |<-- frac_head -->|------------- PT_LOAD ------------- - * ----+-----------------------+---------------------+---- - * | pfn:N | pfn:N+1 | ... - * ----+-----------------------+---------------------+---- - * | - * pfn_to_paddr(pfn:N) # page size = 16k - * = 0x40ffda4000 - */ - if (!offset1) { - phys_start = page_head_to_phys_start(paddr); - offset1 = paddr_to_offset(phys_start); - frac_head = phys_start - paddr; - memset(bufptr, 0, frac_head); - } - - /* - * Check the case phys_end isn't aligned by page size like the - * phys_start's case. - */ - if (!offset2) { - phys_end = page_head_to_phys_end(paddr); - offset2 = paddr_to_offset(phys_end); - memset(bufptr + (phys_end - paddr), 0, info->page_size - - (phys_end - paddr)); - } + int idx; + off_t offset, size; + void *p, *endp; + unsigned long long phys_start, phys_end; + + p = bufptr; + endp = p + info->page_size; + while (p < endp) { + idx = closest_pt_load(paddr + (p - bufptr), endp - p); + if (idx < 0) + break; + + get_pt_load_extents(idx, &phys_start, &phys_end, &offset, &size); + if (phys_start > paddr + (p - bufptr)) { + memset(p, 0, phys_start - paddr); + p += phys_start - paddr; + } - /* - * Check the separated page on different PT_LOAD segments. - */ - if (offset1 + (phys_end - phys_start) == offset2) { - size1 = phys_end - phys_start; - } else { - for (size1 = 1; size1 < info->page_size - frac_head; size1++) { - offset2 = paddr_to_offset(phys_start + size1); - if (offset1 + size1 != offset2) - break; + offset += paddr - phys_start; + if (size > paddr - phys_start) { + size -= paddr - phys_start; + if (size > endp - p) + size = endp - p; + if (!read_from_vmcore_parallel(fd_memory, offset, bufptr, + size)) { + ERRMSG("Can't read the dump memory(%s).\n", + info->name_memory); + return FALSE; + } + p += size; + } + if (p < endp) { + size = phys_end - paddr; + if (size > endp - p) + size = endp - p; + memset(p, 0, size); + p += size; } } - if(!read_from_vmcore_parallel(fd_memory, offset1, bufptr + frac_head, - size1)) { - ERRMSG("Can't read the dump memory(%s).\n", - info->name_memory); + if (p == bufptr) { + ERRMSG("Attempt to read non-existent page at 0x%llx.\n", + paddr); return FALSE; - } - - if (size1 + frac_head != info->page_size) { - size2 = phys_end - (phys_start + size1); - - if(!read_from_vmcore_parallel(fd_memory, offset2, - bufptr + frac_head + size1, size2)) { - ERRMSG("Can't read the dump memory(%s).\n", - info->name_memory); - return FALSE; - } - } + } else if (p < bufptr) + memset(p, 0, endp - p); return TRUE; }