----- Original Message ----- > Hi Dave, > I have mistaken understand about the first point of view, thanks for correct my fault. > But how about my second point about the optimization for ARM64 getting crash notes? > 2) arm64_get_crash_notes() check the sanity of NT_PRSTATUS notes for online cpus. > If one cpu contains invalid note, it's better to continue finding the crash notes for other > online cpus. So we can extract the backtraces for the online cpus which contain valid note > by using command "bt -a". Hi Qiwu, Can you test the attached patch? It modifies your v3 patch by: (1) making the warning messages wholly contained within arm64_get_crash_notes(), which also makes it a static function (2) clarifying the warning messages (3) replacing your "j" counter with a simple "found" boolean. Thanks, Dave > > As a result without this change, for example, on an ELF-format kdump created > by arm64 kernel, > I can see the below warning messeges during crash initialization: > WARNING: invalid note (name != "CORE" > WARNING: cannot retrieve registers for active tasks > > From this dump, we can see online cpu number is 0,1,2,5,6,7: > crash> p -x __cpu_online_mask > __cpu_online_mask = $1 = { > bits = {0xe7} > } > > However, we cannot retrieve registers and backtraces for all active tasks by > using command "bt -a": > crash> bt -a > PID: 244 TASK: ffffffdcb7e40f80 CPU: 0 COMMAND: "hang_detect" > bt: WARNING: cannot determine starting stack frame for task ffffffdcb7e40f80 > > PID: 0 TASK: ffffffdd0066dd00 CPU: 1 COMMAND: "swapper/1" > bt: WARNING: cannot determine starting stack frame for task ffffffdd0066dd00 > > PID: 0 TASK: ffffffdd00669f00 CPU: 2 COMMAND: "swapper/2" > bt: WARNING: cannot determine starting stack frame for task ffffffdd00669f00 > > PID: 0 TASK: ffffffdd0066cd80 CPU: 3 COMMAND: "swapper/3" > bt: WARNING: cannot determine starting stack frame for task ffffffdd0066cd80 > > PID: 0 TASK: ffffffdd0066ae80 CPU: 4 COMMAND: "swapper/4" > bt: WARNING: cannot determine starting stack frame for task ffffffdd0066ae80 > Qiwu Qiwu > PID: 0 TASK: ffffffdd0066be00 CPU: 5 COMMAND: "swapper/5" > bt: WARNING: cannot determine starting stack frame for task ffffffdd0066be00 > > PID: 0 TASK: ffffffdd006f0000 CPU: 6 COMMAND: "swapper/6" > bt: WARNING: cannot determine starting stack frame for task ffffffdd006f0000 > > PID: 0 TASK: ffffffdd006f6c80 CPU: 7 COMMAND: "swapper/7" > bt: WARNING: cannot determine starting stack frame for task ffffffdd006f6c80 > > Since online cpu0 has invaid NT_PRSTATUS note, lead to skipping the reading > the crash notes for other online cpus: > crash> help -D | grep PC: > LR: 0000000000000000 SP: 0000000000000000 PC: > 0000000000000000 > LR: ffffffa5f58b8688 SP: ffffffdd3fe4ea40 PC: > ffffffa5f6932a10 > LR: ffffffa5f58b8688 SP: ffffffdd3fe6ca40 PC: > ffffffa5f6932a10 > LR: 0000000000000000 Qiwu SP: 0000000000000000 PC: > 0000000000000000 > LR: 0000000000000000 SP: 0000000000000000 PC: > 0000000000000000 > LR: ffffffa5f58b8688 SP: ffffffdd3fec6a40 PC: > ffffffa5f6932a10 > LR: ffffffa5f58b8688 SP: ffffffdd3fee4a40 PC: > ffffffa5f6932a10 > LR: ffffffa5f58b8688 SP: ffffffdd3ff02a40 PC: > ffffffa5f6932a10 > > I test for patch v3 which achieves the proper result. > crash> bt -a > PID: 244 TASK: ffffffdcb7e40f80 CPU: 0 COMMAND: "hang_detect" > bt: WARNING: cannot determine starting stack frame for task ffffffdcb7e40f80 > > PID: 0 TASK: ffffffdd0066dd00 CPU: 1 COMMAND: "swapper/1" > #0 [ffffffdd3fe4ea40] mrdump_stop_noncore_cpu at ffffffa5f6932a0c > #1 [ffffffdd3fe4ebc0] flush_smp_call_function_queue at ffffffa5f58b8684 > #2 [ffffffdd3fe4ec20] generic_smp_call_function_single_interrupt at > ffffffa5f58ba8c4 > #3 [ffffffdd3fe4ec30] handle_IPI at ffffffa5f56a8a10 > #4 [ffffffdd3fe4eca0] gic_handle_irq at ffffffa5f5682294 > --- <IRQ stack> --- Qiwu > #5 [ffffffdd006c7de0] el1_irq at ffffffa5f56844e4 > PC: ffffffa5f7120b28 [cpuidle_enter_state+336] > LR: ffffffa5f7120d70 [cpuidle_enter_state+920] > SP: ffffffdd006c7df0 PSTATE: 10c00145 > X29: ffffffdd006c7df0 X28: 0000000000000001 X27: 0000000000000001 > X26: ffffffa5fa04e000 X25: 0000000000000001 X24: 0000378c55cbccab > X23: ffffffa5f9970980 X22: 0000000000000001 X21: ffffffdcb6f6a400 > X20: ffffffa5fa04efa8 X19: 0000378c56210393 X18: 0000000000000000 > X17: 0000000000000000 X16: 0000000000000000 X15: 0000000000000000 > X14: ffffffdd0066dd00 X13: 00000037475a6000 X12: 000000003455591d > X11: 0000000000000000 X10: 0000000000001000 X9: 0000000000000000 > X8: ffffff8ba00d8f6c X7: 0000000000000000 X6: 1ffffff4bf57ac45 > X5: 00209246a095bf14 X4: 0000348a8d795b93 X3: 431bde82d7b634db > X2: 1ffffffba00cdba3 X1: 0000000000000000 X0: 0000000000000000 > #6 [ffffffdd006c7df0] cpuidle_enter_state at ffffffa5f7120b24 > #7 [ffffffdd006c7e70] cpuidle_enter at ffffffa5f712150c > #8 [ffffffdd006c7e80] call_cpuidle at ffffffa5 Qiwuf5805e24 > #9 [ffffffdd006c7eb0] do_idle at ffffffa5f5806320 > #10 [ffffffdd006c7f80] cpu_startup_entry at ffffffa5f5806910 > #11 [ffffffdd006c7fa0] secondary_start_kernel at ffffffa5f56a7fac > > PID: 0 TASK: ffffffdd00669f00 CPU: 2 CO Qiwu Qiwu QiwuMMAND: "swapper/2" > #0 [ffffffdd3fe6ca40] mrdump_stop_noncore_cpu at ffffffa5f6932a0c > #1 [ffffffdd3fe6cbc0] flush_smp_call_function_queue at ffffffa5f58b8684 > #2 [ffffffdd3fe6cc20] generic_smp_call_function_single_interrupt at > ffffffa5f58ba8c4 > #3 [ffffffdd3fe6cc30] handle_IPI at ffffffa5f56a8a10 > #4 [ffffffdd3fe6cca0] gic_handle_irq at ffffff Qiwu Qiwu Qiwua5f5682294 > --- <IRQ stack> --- > #5 [ffffffdd006cfde0] el1_irq at ffffffa5f56844e4 > PC: ffffffa5f7120b28 [cpuidle_enter_state+336] > LR: ffffffa5f7120d70 [cpuidle_enter_state+920] > SP: ffffffdd006cfdf0 PSTATE: 10c00145 > X29: ffffffdd006cfdf0 X28: 000000000000000 Qiwu Qiwu Qiwu1 X27: 0000000000000001 > X26: ffffffa5fa04e000 X25: 0000000000000001 X24: 0000378c5463ce04 > X23: ffffffa5f9970980 X22: 0000000000000001 X21: ffffffdcb6f69b00 > X20: ffffffa5fa04efa8 X19: 0000378c5621219f X18: 0000000000000000 > X17: 0000000000000000 X16: 0000000000000000 X15: 0000000000000000 > X14: ffffffdd00669f00 X13: 00000037475c4000 X12: 000000003455591d > X11: 0000000000000000 X10: 0000000000001000 X9: 0000000000000000 > X8: ffffff8ba00d9f6c X7: 0000000000000000 X6: 1ffffff4bf57ac5d > X5: 00209246a095bf14 X4: 0000348a8d795b93 X3: 431bde82d7b634db > X2: 1ffffffba00cd3e3 X1: 0000000000000000 X0: 0000000000000000 > #6 [ffffffdd006cfdf0] cpuidle_enter_state at ffffffa5f7120b24 > #7 [ffffffdd006cfe70] cpuidle_enter at ffffffa5f712150c > #8 [ffffffdd006cfe80] call_cpuidle at ffffffa5f5805e24 > #9 [ffffffdd006cfeb0] do_idle at ffffffa5f5806320 > #10 [ffffffdd006cff80] cpu_startup_entry at ffffffa5f580690c > #11 [ffffffdd006cffa0] secondary_start_kernel at ffffffa5f56a7fac > ....... > > Best regards, > Qiwu > > > > -----Original Message----- > From: Dave Anderson <anderson@xxxxxxxxxx> > Sent: Friday, December 13, 2019 4:38 AM > To: 陈启武 <chenqiwu@xxxxxxxxxx> > Cc: crash-utility@xxxxxxxxxx > Subject: Re: [External Mail]Re: [PATCH] Bugfix and optimization for ARM64 > getting crash notes > > > > ----- Original Message ----- > > Hi Dave, > > Above your suggestion, I made changes for patch v2. > > > > Best regards, > > Qiwu > > Sorry, but I'm going to NAK this patch in its current form. I'm not exactly > sure what you're trying to accomplish, but in my testing, it breaks things > unnecessarily. > > For example, moving the call to arm64_get_crash_notes() from POST_VM to > POST_INIT breaks this logic in task_init(): > > 645 else { > 646 if (KDUMP_DUMPFILE()) > 647 map_cpus_to_prstatus(); > 648 else if (ELF_NOTES_VALID() && DISKDUMP_DUMPFILE()) > 649 map_cpus_to_prstatus_kdump_cmprs(); > 650 please_wait("determining panic task"); > 651 set_context(get_panic_context(), NO_PID); > 652 please_wait_done(); > 653 } > > The get_panic_context() call requires that the ARM64 arm64_get_crash_notes() > has already been called, and that ms->panic_task_regs[] array has been > allocated and populated. But with your patch applied, it has not been > called yet, and therefore ms->page_task_regs is still NULL when > get_panic_context() is called above. > > As a result, for example, on a compressed kdump clone created by virsh dump, > I now see this during initialization: > > please wait... (determining panic task) > WARNING: cannot determine starting stack frame for task ffff000008c25280 > > WARNING: cannot determine starting stack frame for task ffff8000fa01b000 > > WARNING: cannot determine starting stack frame for task ffff8000fa01c000 > > WARNING: cannot determine starting stack frame for task ffff8000fa01d000 > > But all of those active tasks have NT_PRSTATUS notes and backtraces: > > crash> help -D | grep PC: > LR: ffff000008085938 SP: ffff000008be3ee0 PC: > ffff000008099cf8 > LR: ffff000008085938 SP: ffff8000fa06ff30 PC: > ffff000008099cf8 > LR: ffff000008085938 SP: ffff8000fa073f30 PC: > ffff000008099cf8 > LR: ffff000008085938 SP: ffff8000fa077f30 PC: > ffff000008099cf8 > crash> > > And therefore have legitimate starting stack frames: > > crash> bt ffff000008c25280 ffff8000fa01b000 ffff8000fa01c000 > ffff8000fa01d000 > PID: 0 TASK: ffff000008c25280 CPU: 0 COMMAND: "swapper/0" > #0 [ffff000008be3ee0] cpu_do_idle at ffff000008099cf4 > #1 [ffff000008be3f10] default_idle_call at ffff000008766f90 > #2 [ffff000008be3f20] cpu_startup_entry at ffff000008110fe4 > #3 [ffff000008be3f80] rest_init at ffff000008761674 > #4 [ffff000008be3f90] start_kernel at ffff000008ab0be8 > > PID: 0 TASK: ffff8000fa01b000 CPU: 1 COMMAND: "swapper/1" > #0 [ffff8000fa06ff30] cpu_do_idle at ffff000008099cf4 > #1 [ffff8000fa06ff60] default_idle_call at ffff000008766f90 > #2 [ffff8000fa06ff70] cpu_startup_entry at ffff000008110fe4 > #3 [ffff8000fa06ffd0] secondary_start_kernel at ffff00000808ecb8 > > PID: 0 TASK: ffff8000fa01c000 CPU: 2 COMMAND: "swapper/2" > #0 [ffff8000fa073f30] cpu_do_idle at ffff000008099cf4 > #1 [ffff8000fa073f60] default_idle_call at ffff000008766f90 > #2 [ffff8000fa073f70] cpu_startup_entry at ffff000008110fe4 > #3 [ffff8000fa073fd0] secondary_start_kernel at ffff00000808ecb8 > > PID: 0 TASK: ffff8000fa01d000 CPU: 3 COMMAND: "swapper/3" > #0 [ffff8000fa077f30] cpu_do_idle at ffff000008099cf4 > #1 [ffff8000fa077f60] default_idle_call at ffff000008766f90 > #2 [ffff8000fa077f70] cpu_startup_entry at ffff000008110fe4 > #3 [ffff8000fa077fd0] secondary_start_kernel at ffff00000808ecb8 > crash> > > Dave > > > > > > > -----Original Message----- > > From: Dave Anderson <anderson@xxxxxxxxxx> > > Sent: Wednesday, December 11, 2019 12:51 AM > > To: qiwuchen55@xxxxxxxxx > > Cc: crash-utility@xxxxxxxxxx; 陈启武 <chenqiwu@xxxxxxxxxx> > > Subject: [External Mail]Re: [PATCH] Bugfix and optimization for ARM64 > > getting crash notes > > > > > > > > ----- Original Message ----- > > > From: chenqiwu <chenqiwu@xxxxxxxxxx> > > > > > > 1) ARM64 call arm64_get_crash_notes() to retrieve active task > > > registers when POST_VM before calling map_cpus_to_prstatus() to > > > remap the NT_PRSTATUS elf notes to the online cpus. It's better to > > > call > > > arm64_get_crash_notes() when POST_INIT. > > > 2) arm64_get_crash_notes() check the sanity of NT_PRSTATUS notes > > > only for online cpus. If one cpu contains invalid note, it's better > > > to continue finding the crash notes for other online cpus. > > > So we can extract the backtraces for the online cpus which contain > > > valid note by using command "bt -a". > > > 3) map_cpus_to_prstatus() remap the NT_PRSTATUS notes only to the > > > online cpus. Make sure there must be a one-to-one relationship > > > between the number of online cpus and the number of notes. > > > > The code in map_cpus_to_prstatus() and > > map_cpus_to_prstatus_kdump_cmprs() > > has been in place forever. Both the nd->nt_prstatus_percpu[] and > > dd->nt_prstatus_percpu[] arrays are per-cpu regardless whether > > they are online or offline. However, since kdump only creates > > NT_PRSTATUS notes for on-line cpus, the "i" index is needed for each cpu, > > and the "j" > > index is needed for the existing NT_PRSTATUS notes. If a cpu is > > offline, its nt_prstatus_percpu[] entry will be zeroed out. > > > > I'm not arguing that the arm64 online-cpu handling may be suspect, but > > your patch should not be making changes to architectural-neutral code > > unless the issue affects all architectures. So please leave those two > > functions alone. > > > > Dave > > > > > > > > > > Signed-off-by: chenqiwu <chenqiwu@xxxxxxxxxx> > > > --- > > > arm64.c | 49 +++++++++++++++++++++++++++++-------------------- > > > diskdump.c | 9 +++------ > > > netdump.c | 4 ++-- > > > 3 files changed, 34 insertions(+), 28 deletions(-) > > > > > > diff --git a/arm64.c b/arm64.c > > > index 233029d..cbad461 100644 > > > --- a/arm64.c > > > +++ b/arm64.c > > > @@ -458,7 +458,7 @@ arm64_init(int when) > > > arm64_stackframe_init() > > > break; > > > > > > -case POST_VM: > > > +case POST_INIT: > > > /* > > > * crash_notes contains machine specific information about the > > > * crash. In particular, it contains CPU registers at the time @@ > > > -3587,7 +3587,7 @@ arm64_get_crash_notes(void) ulong offset; char > > > *buf, *p; ulong *notes_ptrs; -ulong i; > > > +ulong i, j; > > > > > > if (!symbol_exists("crash_notes")) > > > return FALSE; > > > @@ -3620,12 +3620,12 @@ arm64_get_crash_notes(void) if > > > (!(ms->panic_task_regs = calloc((size_t)kt->cpus, sizeof(struct > > > arm64_pt_regs)))) > > > error(FATAL, "cannot calloc panic_task_regs space\n"); > > > > > > -for (i = 0; i < kt->cpus; i++) { > > > - > > > +for (i = 0, j = 0; i < kt->cpus; i++) { > > > if (!readmem(notes_ptrs[i], KVADDR, buf, SIZE(note_buf), > > > "note_buf_t", RETURN_ON_ERROR)) { -error(WARNING, "failed to > > > read note_buf_t\n"); -goto fail; > > > +error(WARNING, "cpu#%d: failed to read note_buf_t\n", i); > > > +++j; > > > +continue; > > > } > > > > > > /* > > > @@ -3655,19 +3655,29 @@ arm64_get_crash_notes(void) > > > note->n_descsz == notesz) > > > BCOPY((char *)note, buf, notesz); > > > } else { > > > -error(WARNING, > > > -"cannot find NT_PRSTATUS note for cpu: %d\n", i); > > > +if (CRASHDEBUG(1)) > > > +error(WARNING, > > > +"cpu#%d: cannot find NT_PRSTATUS note\n", i); > > > +++j; > > > continue; > > > } > > > } > > > > > > +/* > > > + * Check the sanity of NT_PRSTATUS note only for each online cpu. > > > + * If this cpu has invalid note, continue to find the crash notes > > > + * for other online cpus. > > > + */ > > > if (note->n_type != NT_PRSTATUS) { > > > -error(WARNING, "invalid note (n_type != NT_PRSTATUS)\n"); -goto > > > fail; > > > +error(WARNING, "cpu#%d: invalid note (n_type != NT_PRSTATUS)\n", > > > +i); > > > +++j; > > > +continue; > > > } > > > -if (p[0] != 'C' || p[1] != 'O' || p[2] != 'R' || p[3] != 'E') { > > > -error(WARNING, "invalid note (name != \"CORE\"\n"); -goto fail; > > > + > > > +if (!STRNEQ(p, "CORE")) { > > > +error(WARNING, "cpu#%d: invalid note (name != \"CORE\")\n", i); > > > +++j; > > > +continue; > > > } > > > > > > /* > > > @@ -3684,14 +3694,13 @@ arm64_get_crash_notes(void) > > > > > > FREEBUF(buf); > > > FREEBUF(notes_ptrs); > > > -return TRUE; > > > > > > -fail: > > > -FREEBUF(buf); > > > -FREEBUF(notes_ptrs); > > > -free(ms->panic_task_regs); > > > -ms->panic_task_regs = NULL; > > > -return FALSE; > > > +if (j == kt->cpus) { > > > +free(ms->panic_task_regs); > > > +ms->panic_task_regs = NULL; > > > +return FALSE; > > > +} > > > +return TRUE; > > > } > > > > > > static void > > > diff --git a/diskdump.c b/diskdump.c index e88243e..12d8e9c 100644 > > > --- a/diskdump.c > > > +++ b/diskdump.c > > > @@ -130,12 +130,9 @@ map_cpus_to_prstatus_kdump_cmprs(void) > > > */ > > > nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS); > > > > > > -for (i = 0, j = 0; i < nrcpus; i++) { -if (in_cpu_map(ONLINE_MAP, > > > i)) { > > > -dd->nt_prstatus_percpu[i] = nt_ptr[j++]; num_prstatus_notes = > > > -MAX(dd->num_prstatus_notes, i+1); > > > -} > > > +for (i = 0; i < nrcpus; i++) { > > > +if (in_cpu_map(ONLINE_MAP, i)) > > > +dd->nt_prstatus_percpu[i] = nt_ptr[i]; > > > } > > > > > > FREEBUF(nt_ptr); > > > diff --git a/netdump.c b/netdump.c > > > index 406416a..849638a 100644 > > > --- a/netdump.c > > > +++ b/netdump.c > > > @@ -97,9 +97,9 @@ map_cpus_to_prstatus(void) > > > */ > > > nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS); > > > > > > -for (i = 0, j = 0; i < nrcpus; i++) { > > > +for (i = 0; i < nrcpus; i++) { > > > if (in_cpu_map(ONLINE_MAP, i)) > > > -nd->nt_prstatus_percpu[i] = nt_ptr[j++]; > > > +nd->nt_prstatus_percpu[i] = nt_ptr[i]; > > > } > > > > > > FREEBUF(nt_ptr); > > > -- > > > 1.9.1 > > > > > > > > > > #/******本邮件及其附件含有小米公司的保密信息,仅限于发送给上面地址中列出的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部 > > 或部分地泄露、复制、或散发)本邮件中的信息。如果您错收了本邮件,请您立即电话或邮件通知发件人并删除本邮件! > > This e-mail and its attachments contain confidential information from > > XIAOMI, which is intended only for the person or entity whose address > > is listed above. Any use of the information contained herein in any > > way (including, but not limited to, total or partial disclosure, > > reproduction, or dissemination) by persons other than the intended > > recipient(s) is prohibited. If you receive this e-mail in error, > > please notify the sender by phone or email immediately and delete > > it!******/# > > > > #/******本邮件及其附件含有小米公司的保密信息,仅限于发送给上面地址中列出的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部或部分地泄露、复制、或散发)本邮件中的信息。如果您错收了本邮件,请您立即电话或邮件通知发件人并删除本邮件! > This e-mail and its attachments contain confidential information from > XIAOMI, which is intended only for the person or entity whose address is > listed above. Any use of the information contained herein in any way > (including, but not limited to, total or partial disclosure, reproduction, > or dissemination) by persons other than the intended recipient(s) is > prohibited. If you receive this e-mail in error, please notify the sender by > phone or email immediately and delete it!******/# >
diff --git a/arm64.c b/arm64.c index 233029d..1b024a4 100644 --- a/arm64.c +++ b/arm64.c @@ -81,7 +81,7 @@ static int arm64_on_irq_stack(int, ulong); static void arm64_set_irq_stack(struct bt_info *); static void arm64_set_process_stack(struct bt_info *); static int arm64_get_kvaddr_ranges(struct vaddr_range *); -static int arm64_get_crash_notes(void); +static void arm64_get_crash_notes(void); static void arm64_calc_VA_BITS(void); static int arm64_is_uvaddr(ulong, struct task_context *); @@ -465,11 +465,8 @@ arm64_init(int when) * of the crash. We need this information to extract correct * backtraces from the panic task. */ - if (!LIVE() && !arm64_get_crash_notes()) - error(WARNING, - "cannot retrieve registers for active task%s\n\n", - kt->cpus > 1 ? "s" : ""); - + if (!LIVE()) + arm64_get_crash_notes(); break; case LOG_ONLY: @@ -3578,7 +3575,7 @@ arm64_get_smp_cpus(void) /* * Retrieve task registers for the time of the crash. */ -static int +static void arm64_get_crash_notes(void) { struct machine_specific *ms = machdep->machspec; @@ -3587,10 +3584,10 @@ arm64_get_crash_notes(void) ulong offset; char *buf, *p; ulong *notes_ptrs; - ulong i; + ulong i, found; if (!symbol_exists("crash_notes")) - return FALSE; + return; crash_notes = symbol_value("crash_notes"); @@ -3602,9 +3599,9 @@ arm64_get_crash_notes(void) */ if (!readmem(crash_notes, KVADDR, ¬es_ptrs[kt->cpus-1], sizeof(notes_ptrs[kt->cpus-1]), "crash_notes", RETURN_ON_ERROR)) { - error(WARNING, "cannot read crash_notes\n"); + error(WARNING, "cannot read \"crash_notes\"\n"); FREEBUF(notes_ptrs); - return FALSE; + return; } if (symbol_exists("__per_cpu_offset")) { @@ -3620,12 +3617,11 @@ arm64_get_crash_notes(void) if (!(ms->panic_task_regs = calloc((size_t)kt->cpus, sizeof(struct arm64_pt_regs)))) error(FATAL, "cannot calloc panic_task_regs space\n"); - for (i = 0; i < kt->cpus; i++) { - + for (i = found = 0; i < kt->cpus; i++) { if (!readmem(notes_ptrs[i], KVADDR, buf, SIZE(note_buf), "note_buf_t", RETURN_ON_ERROR)) { - error(WARNING, "failed to read note_buf_t\n"); - goto fail; + error(WARNING, "cpu %d: cannot read NT_PRSTATUS note\n", i); + continue; } /* @@ -3655,19 +3651,24 @@ arm64_get_crash_notes(void) note->n_descsz == notesz) BCOPY((char *)note, buf, notesz); } else { - error(WARNING, - "cannot find NT_PRSTATUS note for cpu: %d\n", i); + error(WARNING, "cpu %d: cannot find NT_PRSTATUS note\n", i); continue; } } + /* + * Check the sanity of NT_PRSTATUS note only for each online cpu. + * If this cpu has invalid note, continue to find the crash notes + * for other online cpus. + */ if (note->n_type != NT_PRSTATUS) { - error(WARNING, "invalid note (n_type != NT_PRSTATUS)\n"); - goto fail; + error(WARNING, "cpu %d: invalid NT_PRSTATUS note (n_type != NT_PRSTATUS)\n", i); + continue; } - if (p[0] != 'C' || p[1] != 'O' || p[2] != 'R' || p[3] != 'E') { - error(WARNING, "invalid note (name != \"CORE\"\n"); - goto fail; + + if (!STRNEQ(p, "CORE")) { + error(WARNING, "cpu %d: invalid NT_PRSTATUS note (name != \"CORE\")\n", i); + continue; } /* @@ -3680,18 +3681,17 @@ arm64_get_crash_notes(void) BCOPY(p + OFFSET(elf_prstatus_pr_reg), &ms->panic_task_regs[i], sizeof(struct arm64_pt_regs)); + + found++; } FREEBUF(buf); FREEBUF(notes_ptrs); - return TRUE; -fail: - FREEBUF(buf); - FREEBUF(notes_ptrs); - free(ms->panic_task_regs); - ms->panic_task_regs = NULL; - return FALSE; + if (!found) { + free(ms->panic_task_regs); + ms->panic_task_regs = NULL; + } } static void
-- Crash-utility mailing list Crash-utility@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/crash-utility