Hi Dave, Thanks for your comments. (2012/06/19 1:01), Dave Anderson wrote: > I don't want to add any new initialization-time code -- especially if > it's related to the NT_PRSTATUS notes -- that can abort a crash session > unnecessarily. In your new crash_was_lost_crash_note() function, there > are these two FAULT_ON_ERROR readmem() calls: > > readmem(symbol_value("crash_notes"), KVADDR,&crash_notes_ptr, > sizeof(ulong), "crash_notes", FAULT_ON_ERROR); > and > > readmem(crash_notes_ptr, KVADDR, buf, SIZE(note_buf), > "cpu crash_notes", FAULT_ON_ERROR); > > Although they are highly unlikely to fail, can you please make > both of them RETURN_ON_ERROR, and if the readmem() fails, have > it bail out and return FALSE? I see, I'll make consideration about initialization-time rule from now, to keep crash session, should use readmem() with QUIET|RETURN_ON_ERROR. > And then, if necessary, make any > adjustments to map_cpus_to_prstatus_kdump_cmprs() to handle that > remote possibility. You should be able to test it with your > patched kernel. I'm not able to test for unexpected, deliberate readmem() failure because my core file contains "crash_notes" memory field... Although I've probably misunderstood your advice, I had better add more debugging messages into map_cpus_to_prstatus_kdump_cmprs() to handle conditions where imply "not saved NT_PRSTATUS" or "nt_prstatus_percpu[] is remapped". > Also, I don't understand the wording of this error message > at the end of crash_was_lost_crash_note(): > > error(WARNING, "catch lost crash_notes at cpu#%d\n", cpu); > > Can you re-word that? The notes were not "lost", but rather were > "not saved" by the crashing system. I re-word all "lost" keywords, so too function name with "saved". And also make this warning valid only when CRASHDEBUG() is enable because we can check it later by using "help -D". > Lastly, in __diskdump_memory_dump(), you just skip the "lost" > notes sections: > > for (i = 0, j = 0; j< dd->num_prstatus_notes; i++) { > if (dd->nt_prstatus_percpu[i] == NULL) > continue; > fprintf(fp, " notes[%d]: %lx\n", > i, (ulong)dd->nt_prstatus_percpu[i]); > j++; > } > > Can you make it more obvious, say, by displaying something like: > > notes[6]: (not saved) Looks better, thanks for giving good hint. [updates by attached patch] - display messages only if CRASHDEBUG() >= 1 crash -d 1 => display about NT_PRSTATUS mapping messages as below. ---------------- WARNING: cpu#0: not saved crash_notes WARNING: cpu#1: not saved crash_notes crash: NT_PRSTATUS: cpu#2 = 107a34a8 (remapped from cpu#0) WARNING: cpu#3: not saved crash_notes WARNING: cpu#4: not saved crash_notes WARNING: cpu#5: not saved crash_notes WARNING: cpu#6: not saved crash_notes WARNING: cpu#7: not saved crash_notes ---------------- - help message is enhanced crash> help -D | grep notes num_prstatus_notes: 1 notes_buf: 107a34a8 notes[0]: (not saved) notes[1]: (not saved) notes[2]: 107a34a8 notes[3]: (not saved) notes[4]: (not saved) notes[5]: (not saved) notes[6]: (not saved) notes[7]: (not saved) Thanks, Toshi > Thanks, > Dave > > > -- > Crash-utility mailing list > Crash-utility@xxxxxxxxxx > https://www.redhat.com/mailman/listinfo/crash-utility >
Date: Tue, 19 Jun 2012 11:50:10 +0900 Subject: [PATCH] update NT_PRSTATUS issue fixed up code - Keep crash session even if readmem() is failed. - Rename cpu_was_lost_crash_note() to cpu_saved_crash_note() with inverting return value. - Move warning message to map_cpus_to_prstatus_kdump_cmprs() with re-word into new one and CRASHDEBUG(1) condition. - Add CRASHDEBUG(1) message to map_cpus_to_prstatus_kdump_cmprs() if NT_PRSTATUS is remapped from different cpu. - __diskdump_memory_dump() show "not saved" message for all valid cpus which don't own NT_PRSTATUS. Signed-off-by: Toshikazu Nakayama <nakayama.ts@xxxxxxxxxxxxxx> --- diskdump.c | 50 +++++++++++++++++++++++++++++++++++--------------- 1 files changed, 35 insertions(+), 15 deletions(-) diff --git a/diskdump.c b/diskdump.c index 4bf8560..909ce47 100644 --- a/diskdump.c +++ b/diskdump.c @@ -91,13 +91,13 @@ int dumpfile_is_split(void) } static int -cpu_was_lost_crash_note(int cpu) +cpu_saved_crash_note(int cpu) { int ret; ulong crash_notes_ptr; char *buf, *name; - ret = FALSE; + ret = TRUE; if (cpu < 0 || cpu > NR_CPUS) error(FATAL, "cpu#%d is out of range\n", cpu); @@ -106,16 +106,21 @@ cpu_was_lost_crash_note(int cpu) !VALID_STRUCT(note_buf) || !VALID_STRUCT(elf_prstatus)) goto out; - readmem(symbol_value("crash_notes"), KVADDR, &crash_notes_ptr, - sizeof(ulong), "crash_notes", FAULT_ON_ERROR); + if (!readmem(symbol_value("crash_notes"), KVADDR, &crash_notes_ptr, + sizeof(ulong), "crash_notes", QUIET|RETURN_ON_ERROR)) + goto out; if (!crash_notes_ptr) goto out; buf = GETBUF(SIZE(note_buf)); if ((kt->flags & SMP) && (kt->flags &PER_CPU_OFF)) crash_notes_ptr += kt->__per_cpu_offset[cpu]; - readmem(crash_notes_ptr, KVADDR, buf, SIZE(note_buf), - "cpu crash_notes", FAULT_ON_ERROR); + if (!readmem(crash_notes_ptr, KVADDR, buf, SIZE(note_buf), + "cpu crash_notes", QUIET|RETURN_ON_ERROR)) { + FREEBUF(buf); + goto out; + } + if (BITS64()) { Elf64_Nhdr *note64; @@ -125,7 +130,7 @@ cpu_was_lost_crash_note(int cpu) note64->n_namesz != strlen("CORE") + 1 || strncmp(name, "CORE", note64->n_namesz) || note64->n_descsz != SIZE(elf_prstatus)) - ret = TRUE; + ret = FALSE; } else { Elf32_Nhdr *note32; @@ -135,12 +140,10 @@ cpu_was_lost_crash_note(int cpu) note32->n_namesz != strlen("CORE") + 1 || strncmp(name, "CORE", note32->n_namesz) || note32->n_descsz != SIZE(elf_prstatus)) - ret = TRUE; + ret = FALSE; } FREEBUF(buf); out: - if (ret) - error(WARNING, "catch lost crash_notes at cpu#%d\n", cpu); return ret; } @@ -172,8 +175,20 @@ map_cpus_to_prstatus_kdump_cmprs(void) nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS); for (i = 0, j = 0; i < nrcpus; i++) { - if (in_cpu_map(ONLINE, i) && !cpu_was_lost_crash_note(i)) + if (in_cpu_map(ONLINE, i)) { + if (!cpu_saved_crash_note(i)) { + if (CRASHDEBUG(1)) + error(WARNING, + "cpu#%d: not saved crash_notes\n", + i); + continue; + } + if (CRASHDEBUG(1) && (i != j)) + error(INFO, + "NT_PRSTATUS: cpu#%d = %lx (remapped from cpu#%d)\n", + i, (ulong)nt_ptr[j], j); dd->nt_prstatus_percpu[i] = nt_ptr[j++]; + } } FREEBUF(nt_ptr); @@ -1325,7 +1340,7 @@ dump_nt_prstatus_offset(FILE *fp) int __diskdump_memory_dump(FILE *fp) { - int i, j, others, dump_level; + int i, nrcpus, others, dump_level; struct disk_dump_header *dh; struct disk_dump_sub_header *dsh; struct kdump_sub_header *kdsh; @@ -1515,12 +1530,17 @@ __diskdump_memory_dump(FILE *fp) dd->num_prstatus_notes); fprintf(fp, " notes_buf: %lx\n", (ulong)dd->notes_buf); - for (i = 0, j = 0; j < dd->num_prstatus_notes; i++) { - if (dd->nt_prstatus_percpu[i] == NULL) + nrcpus = kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : + NR_CPUS; + for (i = 0; i < nrcpus; i++) { + if (dd->nt_prstatus_percpu[i] == NULL) { + fprintf(fp, + " notes[%d]: (not saved)\n", + i); continue; + } fprintf(fp, " notes[%d]: %lx\n", i, (ulong)dd->nt_prstatus_percpu[i]); - j++; } dump_nt_prstatus_offset(fp); } -- 1.7.0.4
-- Crash-utility mailing list Crash-utility@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/crash-utility