Hi Kazu
Here are the latest patches for supporting to run bt command against a core dump with kernel stack overflow exception for arm64.
Please help to review and advise if any further change needed.
Tested bt command with options:
bt
bt -a
bt -c 3
By the way, 'mach' command also updated to show overflow stacks info as same as IRQ stacks.
Thanks
Hong
From: HAGIO KAZUHITO(?尾 一仁) <k-hagio-ab@xxxxxxx>
Sent: Wednesday, November 17, 2021 15:23 To: Hong Yang3 杨红 <hong.yang3@xxxxxxx> Cc: Discussion list for crash utility usage, maintenance and development <crash-utility@xxxxxxxxxx> Subject: RE: arm64: Support overflow stack panic 注意:此封邮件来自于公司外部,请注意信息安全!
Attention: This email comes from outside of the company, please pay attention to the information security! Hi Hong, Thank you for the patch and sending it to this list. -----Original Message----- > Hi Crash > > I'll keep refining the patch before it get approved: OK, so we will wait for the refined patch. Thanks, Kazu > > > 1. Fix the error in arm64_overflow_stack_init() which saved the overflow stack address into > ms->irqstacks[], which would cause bt command crash on other cpus. The normal IRQ stacks should be used > for bt command for other cpus. > 2. In addition to unwind on the overflow stack, try to go through the IRQ stack to find more useful > information > > Kernel stack overflow case would be rarely but I'd like to sharp the crash to cover this kind of issue. > > Best regards > Hong > ________________________________ > > From: Hong Yang3 杨红 > Sent: Tuesday, November 16, 2021 9:55 > To: crash-utility@xxxxxxxxxx <crash-utility@xxxxxxxxxx> > Subject: arm64: Support overflow stack panic > > Hi All > > When I was trying to open a core of an overflow stack panic result, the bt command caused a segment fault, > after a while I figured out the overflow stack is not supported by crash utility. > > This patch is trying to initialize the overflow stack information on startup stage, and the bt command works > as expected to dump the correct call trace in the overflow stack, currently it only apply to arm64 target. > > I'm not sure if any other sub command also need to be fixed for full support for the overflow stack, please > advise and I'll try to improve the patch. > > Thanks > Hong YANG |
From e78fbb586cfc54f6c607b7e8305833b30c8a05a8 Mon Sep 17 00:00:00 2001 From: Hong YANG <hong.yang3@xxxxxxx> Date: Mon, 15 Nov 2021 15:41:01 +0800 Subject: [PATCH 1/2] arm64: Support overflow stack panic Overflow stack exception handling supported since kernel 4.14 in commit 872d8327ce8, this patch trying to load the overflow_stack information on startup and dump back trace in this case. Before: KERNEL: ../vmlinux DUMPFILE: la_guestdump.gcore CPUS: 8 DATE: Tue Jul 13 19:59:44 CST 2021 UPTIME: 00:00:42 LOAD AVERAGE: 3.99, 1.13, 0.39 TASKS: 1925 NODENAME: localhost RELEASE: 4.14.156+ VERSION: #1 SMP PREEMPT Tue Jul 13 10:37:23 UTC 2021 MACHINE: aarch64 (unknown Mhz) MEMORY: 8.7 GB PANIC: "Kernel panic - not syncing: kernel stack overflow" PID: 1969 COMMAND: "irq/139-0-0024" TASK: ffffffcc1a230000 [THREAD_INFO: ffffffcc1a230000] CPU: 0 STATE: TASK_RUNNING (PANIC) crash-7.3.0> bt PID: 1969 TASK: ffffffcc1a230000 CPU: 0 COMMAND: "irq/139-0-0024" Segmentation fault (core dumped) After: crash> bt PID: 1969 TASK: ffffffcc1a230000 CPU: 0 COMMAND: "irq/139-0-0024" #0 [ffffffcc7fd5cf50] __delay at ffffff8008c80774 #1 [ffffffcc7fd5cf60] __const_udelay at ffffff8008c80864 #2 [ffffffcc7fd5cf80] msm_trigger_wdog_bite at ffffff80084e9430 #3 [ffffffcc7fd5cfa0] do_vm_restart at ffffff80087bc974 #4 [ffffffcc7fd5cfc0] machine_restart at ffffff80080856fc #5 [ffffffcc7fd5cfd0] emergency_restart at ffffff80080d49bc #6 [ffffffcc7fd5d140] panic at ffffff80080af4c0 #7 [ffffffcc7fd5d150] nmi_panic at ffffff80080af150 #8 [ffffffcc7fd5d190] handle_bad_stack at ffffff800808b0b8 #9 [ffffffcc7fd5d2d0] __bad_stack at ffffff800808285c --- <Overflow stack> --- #10 [ffffff801187bc60] el1_error_invalid at ffffff8008082e7c #11 [ffffff801187bcc0] cyttsp6_mt_attention at ffffff8000e8498c [cyttsp6] #12 [ffffff801187bd20] call_atten_cb at ffffff8000e82030 [cyttsp6] #13 [ffffff801187bdc0] cyttsp6_irq at ffffff8000e81e34 [cyttsp6] #14 [ffffff801187bdf0] irq_thread_fn at ffffff8008128dd8 #15 [ffffff801187be50] irq_thread at ffffff8008128ca4 #16 [ffffff801187beb0] kthread at ffffff80080d2fc4 crash> Signed-off-by: Hong YANG <hong.yang3@xxxxxxx> --- arm64.c | 155 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- defs.h | 6 +++ 2 files changed, 152 insertions(+), 9 deletions(-) diff --git a/arm64.c b/arm64.c index 94681d1..a2f6559 100644 --- a/arm64.c +++ b/arm64.c @@ -45,6 +45,7 @@ static int arm64_vtop_3level_4k(ulong, ulong, physaddr_t *, int); static int arm64_vtop_4level_4k(ulong, ulong, physaddr_t *, int); static ulong arm64_get_task_pgd(ulong); static void arm64_irq_stack_init(void); +static void arm64_overflow_stack_init(void); static void arm64_stackframe_init(void); static int arm64_eframe_search(struct bt_info *); static int arm64_is_kernel_exception_frame(struct bt_info *, ulong); @@ -63,6 +64,7 @@ static int arm64_get_dumpfile_stackframe(struct bt_info *, struct arm64_stackfra static int arm64_in_kdump_text(struct bt_info *, struct arm64_stackframe *); static int arm64_in_kdump_text_on_irq_stack(struct bt_info *); static int arm64_switch_stack(struct bt_info *, struct arm64_stackframe *, FILE *); +static int arm64_switch_stack_from_overflow(struct bt_info *, struct arm64_stackframe *, FILE *); static int arm64_get_stackframe(struct bt_info *, struct arm64_stackframe *); static void arm64_get_stack_frame(struct bt_info *, ulong *, ulong *); static void arm64_gen_hidden_frame(struct bt_info *bt, ulong, struct arm64_stackframe *); @@ -78,8 +80,11 @@ static int arm64_get_smp_cpus(void); static void arm64_clear_machdep_cache(void); static int arm64_on_process_stack(struct bt_info *, ulong); static int arm64_in_alternate_stack(int, ulong); +static int arm64_in_alternate_stackv(int cpu, ulong stkptr, ulong *stacks, ulong stack_size); static int arm64_on_irq_stack(int, ulong); +static int arm64_on_overflow_stack(int, ulong); static void arm64_set_irq_stack(struct bt_info *); +static void arm64_set_overflow_stack(struct bt_info *); static void arm64_set_process_stack(struct bt_info *); static int arm64_get_kvaddr_ranges(struct vaddr_range *); static void arm64_get_crash_notes(void); @@ -463,6 +468,7 @@ arm64_init(int when) machdep->hz = 100; arm64_irq_stack_init(); + arm64_overflow_stack_init(); arm64_stackframe_init(); break; @@ -1715,6 +1721,49 @@ arm64_irq_stack_init(void) } } +/* + * Gather Overflow stack values. + * + * Overflow stack supported since 4.14, in commit 872d8327c + */ +static void +arm64_overflow_stack_init(void) +{ + int i; + struct syment *sp; + struct gnu_request request, *req; + struct machine_specific *ms = machdep->machspec; + req = &request; + + if (symbol_exists("overflow_stack") && + (sp = per_cpu_symbol_search("overflow_stack")) && + get_symbol_type("overflow_stack", NULL, req)) { + if (CRASHDEBUG(1)) { + fprintf(fp, "overflow_stack: \n"); + fprintf(fp, " type: %x, %s\n", + (int)req->typecode, + (req->typecode == TYPE_CODE_PTR) ? + "TYPE_CODE_PTR" : "other"); + fprintf(fp, " target_typecode: %x, %s\n", + (int)req->target_typecode, + req->target_typecode == TYPE_CODE_INT ? + "TYPE_CODE_INT" : "other"); + fprintf(fp, " target_length: %ld\n", + req->target_length); + fprintf(fp, " length: %ld\n", req->length); + } + + if (!(ms->overflow_stacks = (ulong *)malloc((size_t)(kt->cpus * sizeof(ulong))))) + error(FATAL, "cannot malloc overflow_stack addresses\n"); + + ms->overflow_stack_size = ARM64_OVERFLOW_STACK_SIZE; + machdep->flags |= OVERFLOW_STACKS; + + for (i = 0; i < kt->cpus; i++) + ms->overflow_stacks[i] = kt->__per_cpu_offset[i] + sp->value; + } +} + /* * Gather and verify all of the backtrace requirements. */ @@ -2255,12 +2304,14 @@ arm64_unwind_frame(struct bt_info *bt, struct arm64_stackframe *frame) if (!(machdep->flags & IRQ_STACKS)) return TRUE; - if (!(machdep->flags & IRQ_STACKS)) + if (!(machdep->flags & OVERFLOW_STACKS)) return TRUE; if (machdep->flags & UNW_4_14) { - if ((bt->flags & BT_IRQSTACK) && - !arm64_on_irq_stack(bt->tc->processor, frame->fp)) { + if (((bt->flags & BT_IRQSTACK) && + !arm64_on_irq_stack(bt->tc->processor, frame->fp)) || + ((bt->flags & BT_OVERFLOW_STACK) && + !arm64_on_overflow_stack(bt->tc->processor, frame->fp))) { if (arm64_on_process_stack(bt, frame->fp)) { arm64_set_process_stack(bt); @@ -2673,6 +2724,12 @@ arm64_back_trace_cmd(struct bt_info *bt) bt->hp->eip : GET_STACK_ULONG(bt->hp->esp); stackframe.sp = bt->hp->esp + 8; bt->flags &= ~BT_REGS_NOT_FOUND; + } else if (arm64_on_overflow_stack(bt->tc->processor, bt->frameptr)) { + arm64_set_overflow_stack(bt); + bt->flags |= BT_OVERFLOW_STACK; + stackframe.sp = bt->stkptr; + stackframe.pc = bt->instptr; + stackframe.fp = bt->frameptr; } else { if (arm64_on_irq_stack(bt->tc->processor, bt->frameptr)) { arm64_set_irq_stack(bt); @@ -2732,6 +2789,7 @@ arm64_back_trace_cmd(struct bt_info *bt) if (arm64_in_exception_text(bt->instptr) && INSTACK(stackframe.fp, bt)) { if (!(bt->flags & BT_IRQSTACK) || + !(bt->flags & BT_OVERFLOW_STACK) || ((stackframe.sp + SIZE(pt_regs)) < bt->stacktop)) { if (arm64_is_kernel_exception_frame(bt, stackframe.fp - KERN_EFRAME_OFFSET)) exception_frame = stackframe.fp - KERN_EFRAME_OFFSET; @@ -2745,6 +2803,12 @@ arm64_back_trace_cmd(struct bt_info *bt) break; } + if ((bt->flags & BT_OVERFLOW_STACK) && + !arm64_on_overflow_stack(bt->tc->processor, stackframe.fp)) { + bt->flags &= ~BT_OVERFLOW_STACK; + if (arm64_switch_stack_from_overflow(bt, &stackframe, ofp) == USER_MODE) + break; + } level++; } @@ -3131,6 +3195,43 @@ arm64_switch_stack(struct bt_info *bt, struct arm64_stackframe *frame, FILE *ofp return KERNEL_MODE; } +static int +arm64_switch_stack_from_overflow(struct bt_info *bt, struct arm64_stackframe *frame, FILE *ofp) +{ + int i; + ulong stacktop, words, addr; + ulong *stackbuf; + char buf[BUFSIZE]; + struct machine_specific *ms = machdep->machspec; + + if (bt->flags & BT_FULL) { + stacktop = ms->overflow_stacks[bt->tc->processor] + ms->overflow_stack_size; + words = (stacktop - bt->bptr) / sizeof(ulong); + stackbuf = (ulong *)GETBUF(words * sizeof(ulong)); + readmem(bt->bptr, KVADDR, stackbuf, words * sizeof(long), + "top of overflow stack", FAULT_ON_ERROR); + + addr = bt->bptr; + for (i = 0; i < words; i++) { + if (!(i & 1)) + fprintf(ofp, "%s %lx: ", i ? "\n" : "", addr); + fprintf(ofp, "%s ", format_stack_entry(bt, buf, stackbuf[i], 0)); + addr += sizeof(ulong); + } + fprintf(ofp, "\n"); + FREEBUF(stackbuf); + } + fprintf(ofp, "--- <Overflow stack> ---\n"); + + if (frame->fp == 0) + return USER_MODE; + + if (!(machdep->flags & UNW_4_14)) + arm64_print_exception_frame(bt, frame->sp, KERNEL_MODE, ofp); + + return KERNEL_MODE; +} + static int arm64_get_dumpfile_stackframe(struct bt_info *bt, struct arm64_stackframe *frame) { @@ -3682,6 +3783,16 @@ arm64_display_machine_stats(void) machdep->machspec->irq_stacks[i]); } } + if (machdep->machspec->overflow_stack_size) { + fprintf(fp, "OVERFLOW STACK SIZE: %ld\n", + machdep->machspec->overflow_stack_size); + fprintf(fp, " OVERFLOW STACKS:\n"); + for (i = 0; i < kt->cpus; i++) { + pad = (i < 10) ? 3 : (i < 100) ? 2 : (i < 1000) ? 1 : 0; + fprintf(fp, "%s CPU %d: %lx\n", space(pad), i, + machdep->machspec->overflow_stacks[i]); + } + } } static int @@ -3881,20 +3992,36 @@ arm64_on_irq_stack(int cpu, ulong stkptr) } static int -arm64_in_alternate_stack(int cpu, ulong stkptr) +arm64_in_alternate_stackv(int cpu, ulong stkptr, ulong *stacks, ulong stack_size) { - struct machine_specific *ms = machdep->machspec; - - if (!ms->irq_stack_size || (cpu >= kt->cpus)) + if (!stack_size || (cpu >= kt->cpus)) return FALSE; - if ((stkptr >= ms->irq_stacks[cpu]) && - (stkptr < (ms->irq_stacks[cpu] + ms->irq_stack_size))) + if ((stkptr >= stacks[cpu]) && + (stkptr < (stacks[cpu] + stack_size))) return TRUE; return FALSE; } +static int +arm64_in_alternate_stack(int cpu, ulong stkptr) +{ + struct machine_specific *ms = machdep->machspec; + + return arm64_in_alternate_stackv(cpu, stkptr, + ms->irq_stacks, ms->irq_stack_size); +} + +static int +arm64_on_overflow_stack(int cpu, ulong stkptr) +{ + struct machine_specific *ms = machdep->machspec; + + return arm64_in_alternate_stackv(cpu, stkptr, + ms->overflow_stacks, ms->overflow_stack_size); +} + static void arm64_set_irq_stack(struct bt_info *bt) { @@ -3905,6 +4032,16 @@ arm64_set_irq_stack(struct bt_info *bt) alter_stackbuf(bt); } +static void +arm64_set_overflow_stack(struct bt_info *bt) +{ + struct machine_specific *ms = machdep->machspec; + + bt->stackbase = ms->overflow_stacks[bt->tc->processor]; + bt->stacktop = bt->stackbase + ms->overflow_stack_size; + alter_stackbuf(bt); +} + static void arm64_set_process_stack(struct bt_info *bt) { diff --git a/defs.h b/defs.h index a2f3085..7e2a16e 100644 --- a/defs.h +++ b/defs.h @@ -3218,6 +3218,7 @@ typedef signed int s32; #define UNW_4_14 (0x200) #define FLIPPED_VM (0x400) #define HAS_PHYSVIRT_OFFSET (0x800) +#define OVERFLOW_STACKS (0x1000) /* * Get kimage_voffset from /dev/crash @@ -3260,6 +3261,7 @@ typedef signed int s32; #define ARM64_STACK_SIZE (16384) #define ARM64_IRQ_STACK_SIZE ARM64_STACK_SIZE +#define ARM64_OVERFLOW_STACK_SIZE (4096) #define _SECTION_SIZE_BITS 30 #define _SECTION_SIZE_BITS_5_12 27 @@ -3332,6 +3334,9 @@ struct machine_specific { char *irq_stackbuf; ulong __irqentry_text_start; ulong __irqentry_text_end; + ulong overflow_stack_size; + ulong *overflow_stacks; + char *overflow_stackbuf; /* for exception vector code */ ulong exp_entry1_start; ulong exp_entry1_end; @@ -5770,6 +5775,7 @@ ulong cpu_map_addr(const char *type); #define BT_CPUMASK (0x1000000000000ULL) #define BT_SHOW_ALL_REGS (0x2000000000000ULL) #define BT_REGS_NOT_FOUND (0x4000000000000ULL) +#define BT_OVERFLOW_STACK (0x8000000000000ULL) #define BT_SYMBOL_OFFSET (BT_SYMBOLIC_ARGS) #define BT_REF_HEXVAL (0x1) -- 2.25.1
From b20c213f05c7d971b1d57c49a705aaf3f73d0afe Mon Sep 17 00:00:00 2001 From: Hong YANG <hong.yang3@xxxxxxx> Date: Wed, 17 Nov 2021 17:21:10 +0800 Subject: [PATCH 2/2] arm64: Dump stack overflow exception frame info The overflow stack exception frame is right after the handle_bad_stack() frame, we print it out immediately after found this symbol on the overflow stack. crash> bt PID: 1969 TASK: ffffffcc1a230000 CPU: 0 COMMAND: "irq/139-0-0024" ... #6 [ffffffcc7fd5d140] panic at ffffff80080af4c0 #7 [ffffffcc7fd5d150] nmi_panic at ffffff80080af150 #8 [ffffffcc7fd5d190] handle_bad_stack at ffffff800808b0b8 #9 [ffffffcc7fd5d2d0] __bad_stack at ffffff800808285c PC: ffffff8008082e80 [el1_sync] LR: ffffff8000e84c68 [cyttsp6_get_mt_touches+144] SP: dfffff80218780e0 PSTATE: 204003c5 X29: ffffff801187bc60 X28: 0000000000000000 X27: ffffffcc1a1d45e8 X26: 0000000000000000 X25: ffffffcc1a1d4000 X24: 0000000000000000 X23: 0000000000000000 X22: ffffffcc55562020 X21: 0000000000000001 X20: ffffffcc1a1d4118 X19: 0000000000000001 X18: 00000000fffdfdfc X17: 00000000000dfdfc X16: 00000000000dfdfc X15: 00000000000000e6 X14: 000000000000007c X13: 0000000000020280 X12: 0000000000000000 X11: 0000000000000001 X10: ffffffcc1a1d45e8 X9: 0000000000000000 X8: dfffff802187bba0 X7: 0000000000000000 X6: 000000000000003f X5: 0000000000000040 X4: 0000000000000020 X3: 000000000fffffe0 X2: 000000000fffffe0 X1: 0000000000000000 X0: dfffff802187bba0 --- <Overflow stack> --- #10 [ffffff801187bc60] el1_error_invalid at ffffff8008082e7c ... #14 [ffffff801187bdf0] irq_thread_fn at ffffff8008128dd8 #15 [ffffff801187be50] irq_thread at ffffff8008128ca4 #16 [ffffff801187beb0] kthread at ffffff80080d2fc4 Signed-off-by: Hong YANG <hong.yang3@xxxxxxx> --- arm64.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arm64.c b/arm64.c index a2f6559..54a1037 100644 --- a/arm64.c +++ b/arm64.c @@ -2009,6 +2009,7 @@ static char *arm64_exception_functions[] = { "do_mem_abort", "do_el0_irq_bp_hardening", "do_sp_pc_abort", + "handle_bad_stack", NULL }; @@ -2027,7 +2028,10 @@ arm64_in_exception_text(ulong ptr) if ((ptr >= ms->__exception_text_start) && (ptr < ms->__exception_text_end)) return TRUE; - } else if ((name = closest_symbol(ptr))) { /* Linux 5.5 and later */ + } + + name = closest_symbol(ptr); + if (name != NULL) { /* Linux 5.5 and later */ for (func = &arm64_exception_functions[0]; *func; func++) { if (STREQ(name, *func)) return TRUE; @@ -2788,8 +2792,9 @@ arm64_back_trace_cmd(struct bt_info *bt) break; if (arm64_in_exception_text(bt->instptr) && INSTACK(stackframe.fp, bt)) { - if (!(bt->flags & BT_IRQSTACK) || - !(bt->flags & BT_OVERFLOW_STACK) || + if (bt->flags & BT_OVERFLOW_STACK) { + exception_frame = stackframe.fp - KERN_EFRAME_OFFSET; + } else if (!(bt->flags & BT_IRQSTACK) || ((stackframe.sp + SIZE(pt_regs)) < bt->stacktop)) { if (arm64_is_kernel_exception_frame(bt, stackframe.fp - KERN_EFRAME_OFFSET)) exception_frame = stackframe.fp - KERN_EFRAME_OFFSET; -- 2.25.1
-- Crash-utility mailing list Crash-utility@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/crash-utility