On 2022/06/29 4:19, Hari Bathini wrote: > A CPU could be in an emergency stack when it is running in real mode > or any special scenario like TM bad thing. Also, there are dedicated > emergency stacks for machine check and system reset interrupt. Right > now, no backtrace is provided if a CPU is in any of these stacks. > This change ensures backtrace is processed appropriately even when > a CPU is in any one of these emergency stacks. Also, if stack info > cannot be found, print that message always instead of only when > verbose logs are enabled. > > Signed-off-by: Hari Bathini <hbathini@xxxxxxxxxxxxx> > --- > defs.h | 12 ++++ > ppc64.c | 196 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- > 2 files changed, 196 insertions(+), 12 deletions(-) > > diff --git a/defs.h b/defs.h > index d1d3ea9..9b1b69a 100644 > --- a/defs.h > +++ b/defs.h > @@ -6288,6 +6288,13 @@ struct ppc64_elf_prstatus { > > #ifdef PPC64 > > +enum emergency_stack_type { > + NONE_STACK = 0, > + EMERGENCY_STACK, > + NMI_EMERGENCY_STACK, > + MC_EMERGENCY_STACK > +}; > + > struct ppc64_opal { > uint64_t base; > uint64_t entry; > @@ -6307,6 +6314,11 @@ struct machine_specific { > char *hwstackbuf; > uint hwstacksize; > > + /* Emergency stacks */ > + ulong *emergency_sp; > + ulong *nmi_emergency_sp; > + ulong *mc_emergency_sp; > + > uint l4_index_size; > uint l3_index_size; > uint l2_index_size; > diff --git a/ppc64.c b/ppc64.c > index 5d4c951..333a775 100644 > --- a/ppc64.c > +++ b/ppc64.c > @@ -48,6 +48,10 @@ static ulong ppc64_get_stackbase(ulong); > static ulong ppc64_get_stacktop(ulong); > void ppc64_compiler_warning_stub(void); > static ulong ppc64_in_irqstack(ulong); > +static enum emergency_stack_type ppc64_in_emergency_stack(int cpu, ulong addr, > + bool verbose); > +static void ppc64_set_bt_emergency_stack(enum emergency_stack_type type, > + struct bt_info *bt); > static char * ppc64_check_eframe(struct ppc64_pt_regs *); > static void ppc64_print_eframe(char *, struct ppc64_pt_regs *, > struct bt_info *); > @@ -56,6 +60,7 @@ static int ppc64_paca_percpu_offset_init(int); > static void ppc64_init_cpu_info(void); > static int ppc64_get_cpu_map(void); > static void ppc64_clear_machdep_cache(void); > +static void ppc64_init_paca_info(void); > static void ppc64_vmemmap_init(void); > static int ppc64_get_kvaddr_ranges(struct vaddr_range *); > static uint get_ptetype(ulong pte); > @@ -692,6 +697,8 @@ ppc64_init(int when) > error(FATAL, "cannot malloc hwirqstack buffer space."); > } > > + ppc64_init_paca_info(); > + > if (!machdep->hz) { > machdep->hz = HZ; > if (THIS_KERNEL_VERSION >= LINUX(2,6,0)) > @@ -1203,6 +1210,63 @@ ppc64_kvtop(struct task_context *tc, ulong kvaddr, > return ppc64_vtop(kvaddr, (ulong *)vt->kernel_pgd[0], paddr, verbose); > } > > +static void > +ppc64_init_paca_info(void) > +{ > + struct machine_specific *ms = machdep->machspec; > + ulong paca_ptr[kt->cpus]; Please don't use variable-length array, it might be fragile. Thanks, Kazu > + int i; > + > + /* Get paca pointers for all CPUs. */ > + if (symbol_exists("paca_ptrs")) { > + ulong paca_loc; > + > + readmem(symbol_value("paca_ptrs"), KVADDR, &paca_loc, sizeof(void *), > + "paca double pointer", FAULT_ON_ERROR); > + readmem(paca_loc, KVADDR, paca_ptr, sizeof(void *) * kt->cpus, > + "paca pointers", FAULT_ON_ERROR); > + } else if (symbol_exists("paca") && > + (get_symbol_type("paca", NULL, NULL) == TYPE_CODE_PTR)) { > + readmem(symbol_value("paca"), KVADDR, paca_ptr, sizeof(void *) * kt->cpus, > + "paca pointers", FAULT_ON_ERROR); > + } else > + return; > + > + /* Initialize emergency stacks info. */ > + if (MEMBER_EXISTS("paca_struct", "emergency_sp")) { > + ulong offset = MEMBER_OFFSET("paca_struct", "emergency_sp"); > + > + if (!(ms->emergency_sp = (ulong *)calloc(kt->cpus, sizeof(ulong)))) > + error(FATAL, "cannot malloc emergency stack space."); > + for (i = 0; i < kt->cpus; i++) > + readmem(paca_ptr[i] + offset, KVADDR, &ms->emergency_sp[i], > + sizeof(void *), "paca->emergency_sp", > + FAULT_ON_ERROR); > + } > + > + if (MEMBER_EXISTS("paca_struct", "nmi_emergency_sp")) { > + ulong offset = MEMBER_OFFSET("paca_struct", "nmi_emergency_sp"); > + > + if (!(ms->nmi_emergency_sp = (ulong *)calloc(kt->cpus, sizeof(ulong)))) > + error(FATAL, "cannot malloc NMI emergency stack space."); > + for (i = 0; i < kt->cpus; i++) > + readmem(paca_ptr[i] + offset, KVADDR, &ms->nmi_emergency_sp[i], > + sizeof(void *), "paca->nmi_emergency_sp", > + FAULT_ON_ERROR); > + } > + > + if (MEMBER_EXISTS("paca_struct", "mc_emergency_sp")) { > + ulong offset = MEMBER_OFFSET("paca_struct", "mc_emergency_sp"); > + > + if (!(ms->mc_emergency_sp = (ulong *)calloc(kt->cpus, sizeof(ulong)))) > + error(FATAL, "cannot malloc machine check emergency stack space."); > + for (i = 0; i < kt->cpus; i++) > + readmem(paca_ptr[i] + offset, KVADDR, &ms->mc_emergency_sp[i], > + sizeof(void *), "paca->mc_emergency_sp", > + FAULT_ON_ERROR); > + } > +} > + > /* > * Verify that the kernel has made the vmemmap list available, > * and if so, stash the relevant data required to make vtop > @@ -1754,6 +1818,11 @@ ppc64_eframe_search(struct bt_info *bt_in) > addr = bt->stackbase + > roundup(SIZE(thread_info), sizeof(ulong)); > } else if (!INSTACK(addr, bt)) { > + enum emergency_stack_type estype; > + > + if ((estype = ppc64_in_emergency_stack(bt->tc->processor, addr, false))) > + ppc64_set_bt_emergency_stack(estype, bt); > + > /* > * If the user specified SP is in HW interrupt stack > * (only for tasks running on other CPUs and in 2.4 > @@ -1855,6 +1924,84 @@ ppc64_in_irqstack(ulong addr) > return 0; > } > > +/* > + * Check if the CPU is running in any of its emergency stacks. > + * Returns > + * NONE_STACK : if input is invalid or addr is not within any emergency stack. > + * EMERGENCY_STACK : if the addr is within emergency stack. > + * NMI_EMERGENCY_STACK : if the addr is within NMI emergency stack. > + * MC_EMERGENCY_STACK : if the addr is within machine check emergency stack. > + */ > +static enum emergency_stack_type > +ppc64_in_emergency_stack(int cpu, ulong addr, bool verbose) > +{ > + struct machine_specific *ms = machdep->machspec; > + ulong base, top; > + > + if (cpu < 0 || cpu >= kt->cpus) > + return NONE_STACK; > + > + if (ms->emergency_sp) { > + top = ms->emergency_sp[cpu]; > + base = top - STACKSIZE(); > + if (addr >= base && addr < top) { > + if (verbose) > + fprintf(fp, "---<Emergency Stack>---\n"); > + return EMERGENCY_STACK; > + } > + } > + > + if (ms->nmi_emergency_sp) { > + top = ms->nmi_emergency_sp[cpu]; > + base = top - STACKSIZE(); > + if (addr >= base && addr < top) { > + if (verbose) > + fprintf(fp, "---<NMI Emergency Stack>---\n"); > + return NMI_EMERGENCY_STACK; > + } > + } > + > + if (ms->mc_emergency_sp) { > + top = ms->mc_emergency_sp[cpu]; > + base = top - STACKSIZE(); > + if (addr >= base && addr < top) { > + if (verbose) > + fprintf(fp, "---<Machine Check Emergency Stack>---\n"); > + return MC_EMERGENCY_STACK; > + } > + } > + > + return NONE_STACK; > +} > + > +static void > +ppc64_set_bt_emergency_stack(enum emergency_stack_type type, struct bt_info *bt) > +{ > + struct machine_specific *ms = machdep->machspec; > + ulong top; > + > + switch (type) { > + case EMERGENCY_STACK: > + top = ms->emergency_sp[bt->tc->processor]; > + break; > + case NMI_EMERGENCY_STACK: > + top = ms->nmi_emergency_sp[bt->tc->processor]; > + break; > + case MC_EMERGENCY_STACK: > + top = ms->mc_emergency_sp[bt->tc->processor]; > + break; > + default: > + top = 0; > + break; > + } > + > + if (top) { > + bt->stackbase = top - STACKSIZE(); > + bt->stacktop = top; > + alter_stackbuf(bt); > + } > +} > + > /* > * Unroll a kernel stack. > */ > @@ -1935,10 +2082,13 @@ ppc64_back_trace_cmd(struct bt_info *bt) > static void > ppc64_back_trace(struct gnu_request *req, struct bt_info *bt) > { > - int frame = 0; > - ulong lr = 0; /* hack...need to pass in initial lr reg */ > + enum emergency_stack_type estype; > ulong newpc = 0, newsp, marker; > + int c = bt->tc->processor; > + ulong nmi_sp = 0; > int eframe_found; > + int frame = 0; > + ulong lr = 0; /* hack...need to pass in initial lr reg */ > > if (!INSTACK(req->sp, bt)) { > ulong irqstack; > @@ -1948,6 +2098,10 @@ ppc64_back_trace(struct gnu_request *req, struct bt_info *bt) > bt->stackbase = irqstack; > bt->stacktop = bt->stackbase + STACKSIZE(); > alter_stackbuf(bt); > + } else if ((estype = ppc64_in_emergency_stack(c, req->sp, true))) { > + if (estype == NMI_EMERGENCY_STACK) > + nmi_sp = req->sp; > + ppc64_set_bt_emergency_stack(estype, bt); > } else if (ms->hwintrstack) { > bt->stacktop = ms->hwintrstack[bt->tc->processor] + > sizeof(ulong); > @@ -1956,9 +2110,7 @@ ppc64_back_trace(struct gnu_request *req, struct bt_info *bt) > bt->stackbuf = ms->hwstackbuf; > alter_stackbuf(bt); > } else { > - if (CRASHDEBUG(1)) { > - fprintf(fp, "cannot find the stack info.\n"); > - } > + fprintf(fp, "cannot find the stack info.\n"); > return; > } > } > @@ -1988,13 +2140,20 @@ ppc64_back_trace(struct gnu_request *req, struct bt_info *bt) > newsp = > *(ulong *)&bt->stackbuf[newsp - bt->stackbase]; > if (!INSTACK(newsp, bt)) { > - /* > - * Switch HW interrupt stack to process's stack. > - */ > - bt->stackbase = GET_STACKBASE(bt->task); > - bt->stacktop = GET_STACKTOP(bt->task); > - alter_stackbuf(bt); > - } > + if ((estype = ppc64_in_emergency_stack(c, newsp, true))) { > + if (!nmi_sp && estype == NMI_EMERGENCY_STACK) > + nmi_sp = newsp; > + ppc64_set_bt_emergency_stack(estype, bt); > + } else { > + /* > + * Switch HW interrupt stack or emergency stack > + * to process's stack. > + */ > + bt->stackbase = GET_STACKBASE(bt->task); > + bt->stacktop = GET_STACKTOP(bt->task); > + alter_stackbuf(bt); > + } > + } > if (IS_KVADDR(newsp) && INSTACK(newsp, bt)) > newpc = *(ulong *)&bt->stackbuf[newsp + 16 - > bt->stackbase]; > @@ -2038,6 +2197,16 @@ ppc64_back_trace(struct gnu_request *req, struct bt_info *bt) > } > } > > + /* > + * NMI stack may not be re-entrant. In so, an SP in the NMI stack > + * is likely to point back to an SP within the NMI stack, in case > + * of a nested NMI. > + */ > + if (nmi_sp && nmi_sp == newsp) { > + fprintf(fp, "---<Nested NMI>---\n"); > + break; > + } > + > /* > * Some Linux 3.7 kernel threads have been seen to have > * their end-of-trace stack linkage pointer pointing > @@ -2415,6 +2584,9 @@ ppc64_get_dumpfile_stack_frame(struct bt_info *bt_in, ulong *nip, ulong *ksp) > pt_regs = (struct ppc64_pt_regs *)bt->machdep; > ur_nip = pt_regs->nip; > ur_ksp = pt_regs->gpr[1]; > + /* Print the collected regs for panic task. */ > + ppc64_print_regs(pt_regs); > + ppc64_print_nip_lr(pt_regs, 1); > } else if ((pc->flags & KDUMP) || > ((pc->flags & DISKDUMP) && > (*diskdump_flags & KDUMP_CMPRS_LOCAL))) { -- Crash-utility mailing list Crash-utility@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/crash-utility Contribution Guidelines: https://github.com/crash-utility/crash/wiki