On Mon, Oct 16, 2017 at 03:33:45PM -0400, Dave Anderson wrote: > > Hi Takahiro, > > One question about a segment of your patch that I can't test because > I don't have a 4.14 dumpfile. Here in arm64_switch_stack(), you have > conditionalized the display of the exception frame: > > @@ -2669,7 +2753,9 @@ arm64_switch_stack(struct bt_info *bt, struct arm64_stackframe *frame, FILE *ofp > if (frame->fp == 0) > return USER_MODE; > > - arm64_print_exception_frame(bt, frame->sp, KERNEL_MODE, ofp); > + if (!(machdep->flags & UNW_4_14)) > + arm64_print_exception_frame(bt, frame->sp, KERNEL_MODE, ofp); > + > return KERNEL_MODE; > } > > I don't understand -- what happens with a UNW_4_14 dumpfile? One of changes made in v4.14 around entry.S is to add a "real stackframe" to exception entry code. So arm64_print_exception_frame() in main loop of arm64_back_trace_cmd() should work alike for irq and other exceptions. So I would say "redundant." === for your reference, when irq takes place, === before calling irq handler(gic_handle_irq), the process stack looks like: sp +--------+ | saved | | regs | | ... | fp +- - - - + |prev fp | |prev lr | +--------+ <--- interrupted stackframe | | | | and after entering irq handler, the irq stack looks like: sp | | fp +--------+ |prev fp | |prev lr | | ... | +--------+ <--- top of irq stack =========================== A bonus from v4.14 changes is that we are now able to observe a "hidden" stackframe under exception frame, which can only appear with "bt -o" if it works :) === before v4.14, no frame for sys_handle_crash === crash-arm64> bt PID: 1189 TASK: ffff80000d1bc100 CPU: 1 COMMAND: "sh" #0 [ffff80000cd07a30] crash_kexec at ffff0000081423a4 #1 [ffff80000cd07a60] die at ffff000008088850 #2 [ffff80000cd07aa0] __do_kernel_fault at ffff000008099900 #3 [ffff80000cd07ad0] do_page_fault at ffff0000089238e0 #4 [ffff80000cd07b40] do_translation_fault at ffff000008923920 #5 [ffff80000cd07b50] do_mem_abort at ffff000008081354 #6 [ffff80000cd07d30] el1_ia at ffff000008082600 PC: ffff000008514060 [sysrq_handle_crash+32] LR: ffff00000851404c [sysrq_handle_crash+12] SP: ffff80000cd07d30 PSTATE: 60000145 X29: ffff80000cd07d30 X28: ffff80000d1bc100 X27: ffff000008932000 X26: 0000000000000040 X25: 0000000000000123 X24: 0000000000000000 X23: 0000000000000009 X22: ffff000008ede000 X21: ffff000008ede870 X20: 0000000000000063 X19: ffff000008e51000 X18: 0000000000000010 X17: 0000ffff98268e90 X16: ffff000008200000 X15: ffffffffffffffff X14: ffff000088f2cf77 X13: ffff000008f2cf85 X12: ffff000008e39e08 X11: ffff000008536220 X10: ffff80000cd07a30 X9: 0000000000000016 X8: 6120726567676972 X7: 54203a2071527379 X6: 000000000000011a X5: 0000000000000000 X4: 0000000000000000 X3: 0000000000000000 X2: ffff80001ff60960 X1: 0000000000000000 X0: 0000000000000001 #7 [ffff80000cd07d40] __handle_sysrq at ffff00000851467c #8 [ffff80000cd07d80] write_sysrq_trigger at ffff000008514c04 #9 [ffff80000cd07da0] proc_reg_write at ffff000008266910 #10 [ffff80000cd07dc0] __vfs_write at ffff0000081fd6c4 #11 [ffff80000cd07e40] vfs_write at ffff0000081feab0 #12 [ffff80000cd07e80] sys_write at ffff000008200044 #13 [ffff80000cd07ec0] el0_svc_naked at ffff000008082f2c PC: 0000ffff98268e78 LR: 000000000040a074 SP: 0000ffffff8ad7f0 X29: 0000ffffff8ae1d0 X28: 000000000047d2c0 X27: 0000000000000000 X26: 0000000005ac2440 X25: 0000000000000000 X24: 0000000000000020 X23: 0000000005ac55e0 X22: 0000000000000002 X21: 0000000005ac55e0 X20: 0000000000000001 X19: 00000000004ae000 X18: 0000ffffff8ac758 X17: 0000ffff98268e90 X16: 0000000000000000 X15: 0000000000000008 X14: 0000ffff98218a24 X13: 0000ffffff8ad8a0 X12: 0101010101010101 X11: 0101010101010101 X10: 0101010101010101 X9: fffffffffffffff0 X8: 0000000000000040 X7: 0000000000000000 X6: 0080000080808080 X5: 0000000000000000 X4: 0000000005ac0063 X3: 0000000005ac55e1 X2: 0000000000000002 X1: 0000000005ac55e0 X0: 0000000000000001 ORIG_X0: 0000000000000001 SYSCALLNO: 40 PSTATE: 80000000 ==== v4.14 or later, see #7 === crash-arm64> bt PID: 1326 TASK: ffff80000d260e00 CPU: 1 COMMAND: "sh" #0 [ffff00000b413a20] crash_kexec at ffff00000814c010 #1 [ffff00000b413a50] die at ffff000008089088 #2 [ffff00000b413a90] __do_kernel_fault at ffff000008097ac0 #3 [ffff00000b413ac0] do_page_fault at ffff000008097bdc #4 [ffff00000b413b30] do_translation_fault at ffff000008097f60 #5 [ffff00000b413b40] do_mem_abort at ffff0000080812ac #6 [ffff00000b413d20] el1_ia at ffff000008082f10 PC: ffff000008544330 [sysrq_handle_crash+32] LR: ffff00000854431c [sysrq_handle_crash+12] SP: ffff00000b413d30 PSTATE: 60000145 X29: ffff00000b413d30 X28: ffff80000d260e00 X27: ffff0000089c1000 X26: 0000000000000040 X25: 0000000000000124 X24: 0000000000000000 X23: 0000000000000009 X22: ffff000008fda000 X21: ffff000008fdaa48 X20: 0000000000000063 X19: ffff000008f33000 X18: 0000000000000010 X17: 0000ffffa1c93e90 X16: ffff00000820b370 X15: ffffffffffffffff X14: ffff00008903c37f X13: ffff00000903c38d X12: ffff000008f19df0 X11: ffff000008566d30 X10: ffff00000b413a50 X9: 0000000000000015 X8: 54203a2071527379 X7: 53203a7172737973 X6: 000000000000010e X5: 0000000000000000 X4: 0000000000000000 X3: 0000000000000000 X2: ffff80000ef627f0 X1: 0000000000000000 X0: 0000000000000001 #7 [ffff00000b413d30] sysrq_handle_crash at ffff00000854432c #8 [ffff00000b413d40] __handle_sysrq at ffff00000854494c #9 [ffff00000b413d80] write_sysrq_trigger at ffff000008544ed4 #10 [ffff00000b413da0] proc_reg_write at ffff00000827273c #11 [ffff00000b413dc0] __vfs_write at ffff00000820ae34 #12 [ffff00000b413e40] vfs_write at ffff00000820b108 #13 [ffff00000b413e80] sys_write at ffff00000820b3b4 #14 [ffff00000b413ff0] el0_svc_naked at ffff0000080837ac PC: 0000ffffa1c93e78 LR: 000000000040a074 SP: 0000ffffd2703e60 X29: 0000ffffd2704840 X28: 000000000047d2c0 X27: 0000000000000000 X26: 000000003c71a440 X25: 0000000000000000 X24: 0000000000000020 X23: 000000003c71cf90 X22: 0000000000000002 X21: 000000003c71cf90 X20: 0000000000000001 X19: 00000000004ae000 X18: 0000ffffd2702dc8 X17: 0000ffffa1c93e90 X16: 0000000000000000 X15: 0000000000000008 X14: 0000ffffa1c43a24 X13: 0000ffffd2703f10 X12: 0101010101010101 X11: 0101010101010101 X10: 0101010101010101 X9: fffffffffffffff0 X8: 0000000000000040 X7: 0000000000000000 X6: 0080808080808080 X5: 0000000000000000 X4: 0000000000000063 X3: 000000003c71cf91 X2: 0000000000000002 X1: 000000003c71cf90 X0: 0000000000000001 ORIG_X0: 0000000000000001 SYSCALLNO: 40 PSTATE: 80000000 =========================== So I'm thinking of disabling "bt -o" for v4.14 or later since "bt -o" may partly lose its "reason d'etre." -Takahiro AKASHI > Thanks, > Dave > > > > > ----- Original Message ----- > > > > Hi Takahiro, > > > > Welcome back! It's great to get you back in the fold again... > > > > I just ran a quick test of your patch on a set of sample dumpfiles, > > and only found one issue. It was on a 4.2 QEMU dump where all of the > > active tasks were running in user-space, and all of their "bt" commands > > fail immediately with a "zero-size" allocation message. As it turns out, > > that was due to this GETBUF() call in arm64_in_kdump_text_on_irq_stack(): > > > > static int > > arm64_in_kdump_text_on_irq_stack(struct bt_info *bt) > > { > > int cpu; > > ulong stackbase; > > char *stackbuf; > > ulong *ptr, *start, *base; > > struct machine_specific *ms; > > > > if ((machdep->flags & (IRQ_STACKS|KDUMP_ENABLED)) != > > (IRQ_STACKS|KDUMP_ENABLED)) > > return FALSE; > > > > ms = machdep->machspec; > > cpu = bt->tc->processor; > > stackbase = ms->irq_stacks[cpu]; > > stackbuf = GETBUF(ms->irq_stack_size); <=== > > ... > > > > ms->irq_stack_size is zero because that kernel version does not have IRQ > > stacks. > > As it turns out, the problem is that your reworked arm64_irq_stack_init() is > > setting > > the IRQ_STACKS flag unconditionally at the bottom of the function. Moving > > the > > flag setting up into the two if-else segments fixes it. > > > > I'll check out the patches in detail next week, but this looks good. > > > > Thanks, > > Dave > > > > > > > > > > > > > > > > > > > > ----- Original Message ----- > > > Dave, > > > > > > On Fri, Sep 22, 2017 at 03:06:00PM -0400, Dave Anderson wrote: > > > > > > > > Jan, > > > > > > > > I went back to creating a machdep->machspec->user_eframe_offset value > > > > to be able to account for both the 4.7 and the upcoming 4.14 pt_regs > > > > changes: > > > > > > > > https://github.com/crash-utility/crash/commit/c975008e61121ef8785622c3bc26964da8fe0deb > > > > > > > > Again, though, note that "bt" does not work with 4.14. > > > > > > Even with your latest changes in 7.2.0, "bt" still has some issues: > > > a.register dump at exception frame doesn't have correct values > > > (due to added stackframe[] in pt_regs) > > > b. tracing irq stack to process stack fails > > > (due to irq-stack implementation changes and VMAP_STACK) > > > c."bt -o" seems to have been broken for a while > > > > > > Attached is my tentative patch, which hopefully addresses (a) and (b). > > > While it is still far from perfect, it may help give you a heads-up. > > > > > > Thanks, > > > -Takahiro AKASHI > > > > > > ===8<=== > > > >From 156ec115b2a436a0738908153d676f8eeed84cb1 Mon Sep 17 00:00:00 2001 > > > From: AKASHI Takahiro <takahiro.akashi@xxxxxxxxxx> > > > Date: Thu, 12 Oct 2017 10:46:34 +0900 > > > Subject: [PATCH] arm64: backtrace for v4.14 > > > > > > --- > > > arm64.c | 172 > > > ++++++++++++++++++++++++++++++++++++++++++++++++++-------------- > > > 1 file changed, 136 insertions(+), 36 deletions(-) > > > > > > diff --git a/arm64.c b/arm64.c > > > index 20c5d34..22c8556 100644 > > > --- a/arm64.c > > > +++ b/arm64.c > > > @@ -72,6 +72,7 @@ static void arm64_cmd_mach(void); > > > static void arm64_display_machine_stats(void); > > > static int arm64_get_smp_cpus(void); > > > static void arm64_clear_machdep_cache(void); > > > +static int arm64_on_process_stack(struct bt_info *, ulong); > > > static int arm64_in_alternate_stack(int, ulong); > > > static int arm64_on_irq_stack(int, ulong); > > > static void arm64_set_irq_stack(struct bt_info *); > > > @@ -1336,29 +1337,60 @@ arm64_irq_stack_init(void) > > > req = &request; > > > struct machine_specific *ms = machdep->machspec; > > > > > > - if (!symbol_exists("irq_stack") || > > > - !(sp = per_cpu_symbol_search("irq_stack")) || > > > - !get_symbol_type("irq_stack", NULL, req) || > > > - (req->typecode != TYPE_CODE_ARRAY) || > > > - (req->target_typecode != TYPE_CODE_INT)) > > > - return; > > > + if (!(ms->irq_stacks = (ulong *)malloc((size_t)(kt->cpus > > > + * sizeof(ulong))))) > > > + error(FATAL, "cannot malloc irq_stack addresses\n"); > > > > > > - if (CRASHDEBUG(1)) { > > > - fprintf(fp, "irq_stack: \n"); > > > - fprintf(fp, " type: %s\n", > > > - (req->typecode == TYPE_CODE_ARRAY) ? "TYPE_CODE_ARRAY" : "other"); > > > - fprintf(fp, " target_typecode: %s\n", > > > - req->target_typecode == TYPE_CODE_INT ? "TYPE_CODE_INT" : "other"); > > > - fprintf(fp, " target_length: %ld\n", req->target_length); > > > - fprintf(fp, " length: %ld\n", req->length); > > > - } > > > + if (symbol_exists("irq_stack") && > > > + (sp = per_cpu_symbol_search("irq_stack")) && > > > + get_symbol_type("irq_stack", NULL, req)) { > > > + /* before v4.14 or CONFIG_VMAP_STACK disabled */ > > > + if (CRASHDEBUG(1)) { > > > + fprintf(fp, "irq_stack: \n"); > > > + fprintf(fp, " type: %s\n", > > > + (req->typecode == TYPE_CODE_ARRAY) ? > > > + "TYPE_CODE_ARRAY" : "other"); > > > + fprintf(fp, " target_typecode: %s\n", > > > + req->target_typecode == TYPE_CODE_INT ? > > > + "TYPE_CODE_INT" : "other"); > > > + fprintf(fp, " target_length: %ld\n", > > > + req->target_length); > > > + fprintf(fp, " length: %ld\n", req->length); > > > + } > > > > > > - ms->irq_stack_size = req->length; > > > - if (!(ms->irq_stacks = (ulong *)malloc((size_t)(kt->cpus * > > > sizeof(ulong))))) > > > - error(FATAL, "cannot malloc irq_stack addresses\n"); > > > + ms->irq_stack_size = req->length; > > > + > > > + for (i = 0; i < kt->cpus; i++) > > > + ms->irq_stacks[i] = kt->__per_cpu_offset[i] + sp->value; > > > + } else if (symbol_exists("irq_stack_ptr") && > > > + (sp = per_cpu_symbol_search("irq_stack_ptr")) && > > > + get_symbol_type("irq_stack_ptr", NULL, req)) { > > > + /* v4.14 and later with CONFIG_VMAP_STACK enabled */ > > > + if (CRASHDEBUG(1)) { > > > + fprintf(fp, "irq_stack_ptr: \n"); > > > + fprintf(fp, " type: %x, %s\n", > > > + (int)req->typecode, > > > + (req->typecode == TYPE_CODE_PTR) ? > > > + "TYPE_CODE_PTR" : "other"); > > > + fprintf(fp, " target_typecode: %x, %s\n", > > > + (int)req->target_typecode, > > > + req->target_typecode == TYPE_CODE_INT ? > > > + "TYPE_CODE_INT" : "other"); > > > + fprintf(fp, " target_length: %ld\n", > > > + req->target_length); > > > + fprintf(fp, " length: %ld\n", req->length); > > > + } > > > + > > > + ms->irq_stack_size = 16384; > > > + > > > + for (i = 0; i < kt->cpus; i++) { > > > + ulong p; > > > > > > - for (i = 0; i < kt->cpus; i++) > > > - ms->irq_stacks[i] = kt->__per_cpu_offset[i] + sp->value; > > > + p = kt->__per_cpu_offset[i] + sp->value; > > > + readmem(p, KVADDR, &(ms->irq_stacks[i]), sizeof(ulong), > > > + "IRQ stack pointer", RETURN_ON_ERROR); > > > + } > > > + } > > > > > > machdep->flags |= IRQ_STACKS; > > > } > > > @@ -1750,11 +1782,20 @@ arm64_display_full_frame(struct bt_info *bt, ulong > > > sp) > > > if (bt->frameptr == sp) > > > return; > > > > > > - if (!INSTACK(sp, bt) || !INSTACK(bt->frameptr, bt)) { > > > - if (sp == 0) > > > - sp = bt->stacktop - USER_EFRAME_OFFSET; > > > - else > > > - return; > > > + if (INSTACK(bt->frameptr, bt)) { > > > + if (INSTACK(sp, bt)) { > > > + /* normal case */; > > > + } else { > > > + if (sp == 0) > > > + /* interrupt in user mode */ > > > + sp = bt->stacktop - USER_EFRAME_OFFSET; > > > + else > > > + /* interrupt in kernel mode */ > > > + sp = bt->stacktop; > > > + } > > > + } else { > > > + error(WARNING, "full display ?\n"); > > > + return; > > > } > > > > > > words = (sp - bt->frameptr) / sizeof(ulong); > > > @@ -1873,6 +1914,25 @@ arm64_unwind_frame(struct bt_info *bt, struct > > > arm64_stackframe *frame) > > > * orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); (pt_regs pointer > > > on > > > process stack) > > > */ > > > if (machdep->flags & IRQ_STACKS) { > > > + if (machdep->flags & UNW_4_14) { > > > + if ((bt->flags & BT_IRQSTACK) && > > > + !arm64_on_irq_stack(bt->tc->processor, frame->fp)) { > > > + if (arm64_on_process_stack(bt, frame->fp)) { > > > + arm64_set_process_stack(bt); > > > + > > > + frame->sp = frame->fp - SIZE(pt_regs) + 16; > > > + /* for switch_stack */ > > > + /* fp still points to irq stack */ > > > + bt->bptr = fp; > > > + /* for display_full_frame */ > > > + /* sp points to process stack */ > > > + bt->frameptr = frame->sp; > > > + } else { > > > + /* irq -> user */ > > > + return FALSE; > > > + } > > > + } > > > + } else { /* !UNW_4_14 */ > > > ms = machdep->machspec; > > > irq_stack_ptr = ms->irq_stacks[bt->tc->processor] + ms->irq_stack_size - > > > 16; > > > > > > @@ -1896,6 +1956,7 @@ arm64_unwind_frame(struct bt_info *bt, struct > > > arm64_stackframe *frame) > > > return FALSE; > > > } > > > } > > > + } /* UNW_4_14 */ > > > } > > > > > > return TRUE; > > > @@ -2086,10 +2147,17 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct > > > arm64_stackframe *frame, > > > * We are on process stack. Just add a faked frame > > > */ > > > > > > - if (!arm64_on_irq_stack(bt->tc->processor, ext_frame.fp)) > > > - frame->sp = ext_frame.fp > > > - - sizeof(struct arm64_pt_regs); > > > - else { > > > + if (!arm64_on_irq_stack(bt->tc->processor, ext_frame.fp)) { > > > + if (MEMBER_EXISTS("pt_regs", "stackframe")) { > > > + frame->sp = ext_frame.fp > > > + - sizeof(struct arm64_pt_regs) - 16; > > > + frame->fp = ext_frame.fp; > > > + } else { > > > + frame->sp = ext_frame.fp > > > + - sizeof(struct arm64_pt_regs); > > > + frame->fp = frame->sp; > > > + } > > > + } else { > > > /* > > > * FIXME: very exceptional case > > > * We are already back on process stack, but > > > @@ -2109,10 +2177,10 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct > > > arm64_stackframe *frame, > > > * Really ugly > > > */ > > > frame->sp = frame->fp + 0x20; > > > + frame->fp = frame->sp; > > > fprintf(ofp, " (Next exception frame might be wrong)\n"); > > > } > > > > > > - frame->fp = frame->sp; > > > } else { > > > /* We are on IRQ stack */ > > > > > > @@ -2122,9 +2190,15 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct > > > arm64_stackframe *frame, > > > if (ext_frame.fp != irq_stack_ptr) { > > > /* (2) Just add a faked frame */ > > > > > > - frame->sp = ext_frame.fp > > > - - sizeof(struct arm64_pt_regs); > > > - frame->fp = frame->sp; > > > + if (MEMBER_EXISTS("pt_regs", "stackframe")) { > > > + frame->sp = ext_frame.fp > > > + - sizeof(struct arm64_pt_regs); > > > + frame->fp = ext_frame.fp; > > > + } else { > > > + frame->sp = ext_frame.fp > > > + - sizeof(struct arm64_pt_regs) - 16; > > > + frame->fp = frame->sp; > > > + } > > > } else { > > > /* > > > * (3) > > > @@ -2304,11 +2378,19 @@ arm64_back_trace_cmd(struct bt_info *bt) > > > if (arm64_in_exception_text(bt->instptr) && INSTACK(stackframe.fp, bt)) > > > { > > > if (!(bt->flags & BT_IRQSTACK) || > > > (((stackframe.sp + SIZE(pt_regs)) < bt->stacktop))) > > > - exception_frame = stackframe.fp - SIZE(pt_regs); > > > + { > > > + if (MEMBER_EXISTS("pt_regs", "stackframe")) > > > + /* v4.14 or later */ > > > + exception_frame = stackframe.fp > > > + - SIZE(pt_regs) + 16; > > > + else > > > + exception_frame = stackframe.fp > > > + - SIZE(pt_regs); > > > + } > > > } > > > > > > if ((bt->flags & BT_IRQSTACK) && > > > - !arm64_on_irq_stack(bt->tc->processor, stackframe.sp)) { > > > + !arm64_on_irq_stack(bt->tc->processor, stackframe.fp)) { > > > bt->flags &= ~BT_IRQSTACK; > > > if (arm64_switch_stack(bt, &stackframe, ofp) == USER_MODE) > > > break; > > > @@ -2424,6 +2506,8 @@ user_space: > > > * otherwise show an exception frame. > > > * Since exception entry code doesn't have a real > > > * stackframe, we fake a dummy frame here. > > > + * Note: Since we have a real stack frame in pt_regs, > > > + * We no longer need a dummy frame on v4.14 or later. > > > */ > > > if (!arm64_in_exp_entry(stackframe.pc)) > > > continue; > > > @@ -2669,7 +2753,9 @@ arm64_switch_stack(struct bt_info *bt, struct > > > arm64_stackframe *frame, FILE *ofp > > > if (frame->fp == 0) > > > return USER_MODE; > > > > > > - arm64_print_exception_frame(bt, frame->sp, KERNEL_MODE, ofp); > > > + if (!(machdep->flags & UNW_4_14)) > > > + arm64_print_exception_frame(bt, frame->sp, KERNEL_MODE, ofp); > > > + > > > return KERNEL_MODE; > > > } > > > > > > @@ -3362,6 +3448,20 @@ arm64_clear_machdep_cache(void) { > > > return; > > > } > > > > > > +static int > > > +arm64_on_process_stack(struct bt_info *bt, ulong stkptr) > > > +{ > > > + ulong stackbase, stacktop; > > > + > > > + stackbase = GET_STACKBASE(bt->task); > > > + stacktop = GET_STACKTOP(bt->task); > > > + > > > + if ((stkptr >= stackbase) && (stkptr < stacktop)) > > > + return TRUE; > > > + > > > + return FALSE; > > > +} > > > + > > > static int > > > arm64_on_irq_stack(int cpu, ulong stkptr) > > > { > > > -- > > > 2.14.1 > > > > > > -- > > > Crash-utility mailing list > > > Crash-utility@xxxxxxxxxx > > > https://www.redhat.com/mailman/listinfo/crash-utility > > > > > > > -- > Crash-utility mailing list > Crash-utility@xxxxxxxxxx > https://www.redhat.com/mailman/listinfo/crash-utility -- Crash-utility mailing list Crash-utility@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/crash-utility