-----Original Message----- > Hi, Roman > Thank you for the improvement. > > > kdump format description [1] says: > > > > [...] zero page has its own offset not equal 0. So when reading page > > from incomplete core, only the page lost by ENOSPACE errors has 0 in its > > corresponding page descriptor's member offset. > > > > crash has special treatment for page descriptors with zero offset only if > > DUMP_DH_COMPRESSED_INCOMPLETE is set in dump header. However, > > makedumpfile places the flag after ENOSPC is hit and only if dump header > > modification went without errors. > > > > In case if crashkernel environment was terminated early (e.g. by BMC) or > > some other reason, DUMP_DH_COMPRESSED_INCOMPLETE won't be set on the > > dump header. Then cache_page() would be performed on pages with > > pd.offset == 0 and due to pd.size == 0 it'll skip read into > > compressed_page and then non related pre-existing contents of > > compressed_page will copied into page cache for the non-present page. > > > > Ultimately, it'll lead to a cryptic failure, like: > > > > crash: invalid kernel virtual address: 72288cacacf427f8 [...] > > > > The failure would be a bit cleaner if crash explicitly fails on the page > > that is an outcome of incomplete dump: > > > > crash: page incomplete: kernel virtual address: c000003fff9d17e8 [...] > > > > Debugging level 8 would also produce exact offset from data_offset to > > print descriptor value with ease: > > > > read_diskdump/cache_page: descriptor with zero offset found at paddr/pfn/pos: 3fff9d0000/3fff9d/743dd > > > > That helps in inspecting broken descriptor with hexdump or similar tools: > > > > hexdump -s (data_offset + pos * 0x18) -n 0x18 Looks good. > > > > 1. https://github.com/makedumpfile/makedumpfile/blob/master/IMPLEMENTATION > > > > Signed-off-by: Roman Bolshakov <r.bolshakov@xxxxxxxxx> > > --- > > defs.h | 1 + > > diskdump.c | 13 ++++++++++--- > > memory.c | 7 +++++++ > > 3 files changed, 18 insertions(+), 3 deletions(-) > > > > diff --git a/defs.h b/defs.h > > index 396d61a..8418da2 100644 > > --- a/defs.h > > +++ b/defs.h > > @@ -361,6 +361,7 @@ struct number_option { > > #define READ_ERROR (-2) > > #define WRITE_ERROR (-3) > > #define PAGE_EXCLUDED (-4) > > +#define PAGE_INCOMPLETE (-5) > > > > #define RESTART() (longjmp(pc->main_loop_env, 1)) > > #define RESUME_FOREACH() (longjmp(pc->foreach_loop_env, 1)) > > diff --git a/diskdump.c b/diskdump.c > > index 3effb52..c05f1ec 100644 > > --- a/diskdump.c > > +++ b/diskdump.c > > @@ -1146,7 +1146,7 @@ cache_page(physaddr_t paddr) > > if (FLAT_FORMAT()) { > > if (!read_flattened_format(dd->dfd, pd.offset, dd->compressed_page, pd.size)) > > return READ_ERROR; > > - } else if (is_incomplete_dump() && (0 == pd.offset)) { > > + } else if (0 == pd.offset) { > > /* > > * If the incomplete flag has been set in the header, > > * first check whether zero_excluded has been set. > > Does it still make sense to check the incomplete flag for the > zero_excluded case? Otherwise, the above code comment will be > outdated. > + } else if (0 == pd.offset) { > ... > - if (*diskdump_flags & ZERO_EXCLUDED) { > + if (is_incomplete_dump() && (*diskdump_flags & ZERO_EXCLUDED)) { Given the fact that makedumpfile cannot mark a dump as incomplete every time it fails, I think it might be good to be able to use zero_excluded option also without the incomplete flag. So I'm ok with the original change, but additionally would like to - fix the comment Lianbo pointed out and help texts of zero_excluded - change netdump.c as well Any concerns? Thanks, Kazu > > Thanks. > Lianbo > > > @@ -1158,8 +1158,15 @@ cache_page(physaddr_t paddr) > > "paddr/pfn: %llx/%lx\n", > > (ulonglong)paddr, pfn); > > memset(dd->compressed_page, 0, dd->block_size); > > - } else > > - return READ_ERROR; > > + } else { > > + if (CRASHDEBUG(8)) > > + fprintf(fp, > > + "read_diskdump/cache_page: " > > + "descriptor with zero offset found at " > > + "paddr/pfn/pos: %llx/%lx/%lx\n", > > + (ulonglong)paddr, pfn, desc_pos); > > + return PAGE_INCOMPLETE; > > + } > > } else { > > if (lseek(dd->dfd, pd.offset, SEEK_SET) == failed) > > return SEEK_ERROR; > > diff --git a/memory.c b/memory.c > > index 8c6bbe4..5d7eee6 100644 > > --- a/memory.c > > +++ b/memory.c > > @@ -2211,6 +2211,7 @@ accessible(ulong kva) > > #define READ_ERRMSG "read error: %s address: %llx type: \"%s\"\n" > > #define WRITE_ERRMSG "write error: %s address: %llx type: \"%s\"\n" > > #define PAGE_EXCLUDED_ERRMSG "page excluded: %s address: %llx type: \"%s\"\n" > > +#define PAGE_INCOMPLETE_ERRMSG "page incomplete: %s address: %llx type: \"%s\"\n" > > > > #define RETURN_ON_PARTIAL_READ() \ > > if ((error_handle & RETURN_PARTIAL) && (size < orig_size)) { \ > > @@ -2376,6 +2377,12 @@ readmem(ulonglong addr, int memtype, void *buffer, long size, > > error(INFO, PAGE_EXCLUDED_ERRMSG, memtype_string(memtype, 0), addr, > type); > > goto readmem_error; > > > > + case PAGE_INCOMPLETE: > > + RETURN_ON_PARTIAL_READ(); > > + if (PRINT_ERROR_MESSAGE) > > + error(INFO, PAGE_INCOMPLETE_ERRMSG, memtype_string(memtype, 0), addr, > type); > > + goto readmem_error; > > + > > default: > > break; > > } > > -- > > 2.31.1 > > -- > Crash-utility mailing list > Crash-utility@xxxxxxxxxx > https://listman.redhat.com/mailman/listinfo/crash-utility -- Crash-utility mailing list Crash-utility@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/crash-utility