Hi Takahiro, Welcome back! It's great to get you back in the fold again... I just ran a quick test of your patch on a set of sample dumpfiles, and only found one issue. It was on a 4.2 QEMU dump where all of the active tasks were running in user-space, and all of their "bt" commands fail immediately with a "zero-size" allocation message. As it turns out, that was due to this GETBUF() call in arm64_in_kdump_text_on_irq_stack(): static int arm64_in_kdump_text_on_irq_stack(struct bt_info *bt) { int cpu; ulong stackbase; char *stackbuf; ulong *ptr, *start, *base; struct machine_specific *ms; if ((machdep->flags & (IRQ_STACKS|KDUMP_ENABLED)) != (IRQ_STACKS|KDUMP_ENABLED)) return FALSE; ms = machdep->machspec; cpu = bt->tc->processor; stackbase = ms->irq_stacks[cpu]; stackbuf = GETBUF(ms->irq_stack_size); <=== ... ms->irq_stack_size is zero because that kernel version does not have IRQ stacks. As it turns out, the problem is that your reworked arm64_irq_stack_init() is setting the IRQ_STACKS flag unconditionally at the bottom of the function. Moving the flag setting up into the two if-else segments fixes it. I'll check out the patches in detail next week, but this looks good. Thanks, Dave ----- Original Message ----- > Dave, > > On Fri, Sep 22, 2017 at 03:06:00PM -0400, Dave Anderson wrote: > > > > Jan, > > > > I went back to creating a machdep->machspec->user_eframe_offset value > > to be able to account for both the 4.7 and the upcoming 4.14 pt_regs > > changes: > > > > https://github.com/crash-utility/crash/commit/c975008e61121ef8785622c3bc26964da8fe0deb > > > > Again, though, note that "bt" does not work with 4.14. > > Even with your latest changes in 7.2.0, "bt" still has some issues: > a.register dump at exception frame doesn't have correct values > (due to added stackframe[] in pt_regs) > b. tracing irq stack to process stack fails > (due to irq-stack implementation changes and VMAP_STACK) > c."bt -o" seems to have been broken for a while > > Attached is my tentative patch, which hopefully addresses (a) and (b). > While it is still far from perfect, it may help give you a heads-up. > > Thanks, > -Takahiro AKASHI > > ===8<=== > >From 156ec115b2a436a0738908153d676f8eeed84cb1 Mon Sep 17 00:00:00 2001 > From: AKASHI Takahiro <takahiro.akashi@xxxxxxxxxx> > Date: Thu, 12 Oct 2017 10:46:34 +0900 > Subject: [PATCH] arm64: backtrace for v4.14 > > --- > arm64.c | 172 > ++++++++++++++++++++++++++++++++++++++++++++++++++-------------- > 1 file changed, 136 insertions(+), 36 deletions(-) > > diff --git a/arm64.c b/arm64.c > index 20c5d34..22c8556 100644 > --- a/arm64.c > +++ b/arm64.c > @@ -72,6 +72,7 @@ static void arm64_cmd_mach(void); > static void arm64_display_machine_stats(void); > static int arm64_get_smp_cpus(void); > static void arm64_clear_machdep_cache(void); > +static int arm64_on_process_stack(struct bt_info *, ulong); > static int arm64_in_alternate_stack(int, ulong); > static int arm64_on_irq_stack(int, ulong); > static void arm64_set_irq_stack(struct bt_info *); > @@ -1336,29 +1337,60 @@ arm64_irq_stack_init(void) > req = &request; > struct machine_specific *ms = machdep->machspec; > > - if (!symbol_exists("irq_stack") || > - !(sp = per_cpu_symbol_search("irq_stack")) || > - !get_symbol_type("irq_stack", NULL, req) || > - (req->typecode != TYPE_CODE_ARRAY) || > - (req->target_typecode != TYPE_CODE_INT)) > - return; > + if (!(ms->irq_stacks = (ulong *)malloc((size_t)(kt->cpus > + * sizeof(ulong))))) > + error(FATAL, "cannot malloc irq_stack addresses\n"); > > - if (CRASHDEBUG(1)) { > - fprintf(fp, "irq_stack: \n"); > - fprintf(fp, " type: %s\n", > - (req->typecode == TYPE_CODE_ARRAY) ? "TYPE_CODE_ARRAY" : "other"); > - fprintf(fp, " target_typecode: %s\n", > - req->target_typecode == TYPE_CODE_INT ? "TYPE_CODE_INT" : "other"); > - fprintf(fp, " target_length: %ld\n", req->target_length); > - fprintf(fp, " length: %ld\n", req->length); > - } > + if (symbol_exists("irq_stack") && > + (sp = per_cpu_symbol_search("irq_stack")) && > + get_symbol_type("irq_stack", NULL, req)) { > + /* before v4.14 or CONFIG_VMAP_STACK disabled */ > + if (CRASHDEBUG(1)) { > + fprintf(fp, "irq_stack: \n"); > + fprintf(fp, " type: %s\n", > + (req->typecode == TYPE_CODE_ARRAY) ? > + "TYPE_CODE_ARRAY" : "other"); > + fprintf(fp, " target_typecode: %s\n", > + req->target_typecode == TYPE_CODE_INT ? > + "TYPE_CODE_INT" : "other"); > + fprintf(fp, " target_length: %ld\n", > + req->target_length); > + fprintf(fp, " length: %ld\n", req->length); > + } > > - ms->irq_stack_size = req->length; > - if (!(ms->irq_stacks = (ulong *)malloc((size_t)(kt->cpus * > sizeof(ulong))))) > - error(FATAL, "cannot malloc irq_stack addresses\n"); > + ms->irq_stack_size = req->length; > + > + for (i = 0; i < kt->cpus; i++) > + ms->irq_stacks[i] = kt->__per_cpu_offset[i] + sp->value; > + } else if (symbol_exists("irq_stack_ptr") && > + (sp = per_cpu_symbol_search("irq_stack_ptr")) && > + get_symbol_type("irq_stack_ptr", NULL, req)) { > + /* v4.14 and later with CONFIG_VMAP_STACK enabled */ > + if (CRASHDEBUG(1)) { > + fprintf(fp, "irq_stack_ptr: \n"); > + fprintf(fp, " type: %x, %s\n", > + (int)req->typecode, > + (req->typecode == TYPE_CODE_PTR) ? > + "TYPE_CODE_PTR" : "other"); > + fprintf(fp, " target_typecode: %x, %s\n", > + (int)req->target_typecode, > + req->target_typecode == TYPE_CODE_INT ? > + "TYPE_CODE_INT" : "other"); > + fprintf(fp, " target_length: %ld\n", > + req->target_length); > + fprintf(fp, " length: %ld\n", req->length); > + } > + > + ms->irq_stack_size = 16384; > + > + for (i = 0; i < kt->cpus; i++) { > + ulong p; > > - for (i = 0; i < kt->cpus; i++) > - ms->irq_stacks[i] = kt->__per_cpu_offset[i] + sp->value; > + p = kt->__per_cpu_offset[i] + sp->value; > + readmem(p, KVADDR, &(ms->irq_stacks[i]), sizeof(ulong), > + "IRQ stack pointer", RETURN_ON_ERROR); > + } > + } > > machdep->flags |= IRQ_STACKS; > } > @@ -1750,11 +1782,20 @@ arm64_display_full_frame(struct bt_info *bt, ulong > sp) > if (bt->frameptr == sp) > return; > > - if (!INSTACK(sp, bt) || !INSTACK(bt->frameptr, bt)) { > - if (sp == 0) > - sp = bt->stacktop - USER_EFRAME_OFFSET; > - else > - return; > + if (INSTACK(bt->frameptr, bt)) { > + if (INSTACK(sp, bt)) { > + /* normal case */; > + } else { > + if (sp == 0) > + /* interrupt in user mode */ > + sp = bt->stacktop - USER_EFRAME_OFFSET; > + else > + /* interrupt in kernel mode */ > + sp = bt->stacktop; > + } > + } else { > + error(WARNING, "full display ?\n"); > + return; > } > > words = (sp - bt->frameptr) / sizeof(ulong); > @@ -1873,6 +1914,25 @@ arm64_unwind_frame(struct bt_info *bt, struct > arm64_stackframe *frame) > * orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); (pt_regs pointer on > process stack) > */ > if (machdep->flags & IRQ_STACKS) { > + if (machdep->flags & UNW_4_14) { > + if ((bt->flags & BT_IRQSTACK) && > + !arm64_on_irq_stack(bt->tc->processor, frame->fp)) { > + if (arm64_on_process_stack(bt, frame->fp)) { > + arm64_set_process_stack(bt); > + > + frame->sp = frame->fp - SIZE(pt_regs) + 16; > + /* for switch_stack */ > + /* fp still points to irq stack */ > + bt->bptr = fp; > + /* for display_full_frame */ > + /* sp points to process stack */ > + bt->frameptr = frame->sp; > + } else { > + /* irq -> user */ > + return FALSE; > + } > + } > + } else { /* !UNW_4_14 */ > ms = machdep->machspec; > irq_stack_ptr = ms->irq_stacks[bt->tc->processor] + ms->irq_stack_size - > 16; > > @@ -1896,6 +1956,7 @@ arm64_unwind_frame(struct bt_info *bt, struct > arm64_stackframe *frame) > return FALSE; > } > } > + } /* UNW_4_14 */ > } > > return TRUE; > @@ -2086,10 +2147,17 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct > arm64_stackframe *frame, > * We are on process stack. Just add a faked frame > */ > > - if (!arm64_on_irq_stack(bt->tc->processor, ext_frame.fp)) > - frame->sp = ext_frame.fp > - - sizeof(struct arm64_pt_regs); > - else { > + if (!arm64_on_irq_stack(bt->tc->processor, ext_frame.fp)) { > + if (MEMBER_EXISTS("pt_regs", "stackframe")) { > + frame->sp = ext_frame.fp > + - sizeof(struct arm64_pt_regs) - 16; > + frame->fp = ext_frame.fp; > + } else { > + frame->sp = ext_frame.fp > + - sizeof(struct arm64_pt_regs); > + frame->fp = frame->sp; > + } > + } else { > /* > * FIXME: very exceptional case > * We are already back on process stack, but > @@ -2109,10 +2177,10 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct > arm64_stackframe *frame, > * Really ugly > */ > frame->sp = frame->fp + 0x20; > + frame->fp = frame->sp; > fprintf(ofp, " (Next exception frame might be wrong)\n"); > } > > - frame->fp = frame->sp; > } else { > /* We are on IRQ stack */ > > @@ -2122,9 +2190,15 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct > arm64_stackframe *frame, > if (ext_frame.fp != irq_stack_ptr) { > /* (2) Just add a faked frame */ > > - frame->sp = ext_frame.fp > - - sizeof(struct arm64_pt_regs); > - frame->fp = frame->sp; > + if (MEMBER_EXISTS("pt_regs", "stackframe")) { > + frame->sp = ext_frame.fp > + - sizeof(struct arm64_pt_regs); > + frame->fp = ext_frame.fp; > + } else { > + frame->sp = ext_frame.fp > + - sizeof(struct arm64_pt_regs) - 16; > + frame->fp = frame->sp; > + } > } else { > /* > * (3) > @@ -2304,11 +2378,19 @@ arm64_back_trace_cmd(struct bt_info *bt) > if (arm64_in_exception_text(bt->instptr) && INSTACK(stackframe.fp, bt)) { > if (!(bt->flags & BT_IRQSTACK) || > (((stackframe.sp + SIZE(pt_regs)) < bt->stacktop))) > - exception_frame = stackframe.fp - SIZE(pt_regs); > + { > + if (MEMBER_EXISTS("pt_regs", "stackframe")) > + /* v4.14 or later */ > + exception_frame = stackframe.fp > + - SIZE(pt_regs) + 16; > + else > + exception_frame = stackframe.fp > + - SIZE(pt_regs); > + } > } > > if ((bt->flags & BT_IRQSTACK) && > - !arm64_on_irq_stack(bt->tc->processor, stackframe.sp)) { > + !arm64_on_irq_stack(bt->tc->processor, stackframe.fp)) { > bt->flags &= ~BT_IRQSTACK; > if (arm64_switch_stack(bt, &stackframe, ofp) == USER_MODE) > break; > @@ -2424,6 +2506,8 @@ user_space: > * otherwise show an exception frame. > * Since exception entry code doesn't have a real > * stackframe, we fake a dummy frame here. > + * Note: Since we have a real stack frame in pt_regs, > + * We no longer need a dummy frame on v4.14 or later. > */ > if (!arm64_in_exp_entry(stackframe.pc)) > continue; > @@ -2669,7 +2753,9 @@ arm64_switch_stack(struct bt_info *bt, struct > arm64_stackframe *frame, FILE *ofp > if (frame->fp == 0) > return USER_MODE; > > - arm64_print_exception_frame(bt, frame->sp, KERNEL_MODE, ofp); > + if (!(machdep->flags & UNW_4_14)) > + arm64_print_exception_frame(bt, frame->sp, KERNEL_MODE, ofp); > + > return KERNEL_MODE; > } > > @@ -3362,6 +3448,20 @@ arm64_clear_machdep_cache(void) { > return; > } > > +static int > +arm64_on_process_stack(struct bt_info *bt, ulong stkptr) > +{ > + ulong stackbase, stacktop; > + > + stackbase = GET_STACKBASE(bt->task); > + stacktop = GET_STACKTOP(bt->task); > + > + if ((stkptr >= stackbase) && (stkptr < stacktop)) > + return TRUE; > + > + return FALSE; > +} > + > static int > arm64_on_irq_stack(int cpu, ulong stkptr) > { > -- > 2.14.1 > > -- > Crash-utility mailing list > Crash-utility@xxxxxxxxxx > https://www.redhat.com/mailman/listinfo/crash-utility > -- Crash-utility mailing list Crash-utility@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/crash-utility