gdb target analyzes only one task at a time and it backtraces only straight C stack until end of the stack. If stacks were concatenated during exceptions or interrupts, gdb bt will show only the topmost one. Introduce multiple stacks support in gdb target, which can be observed as a different threads from gdb perspective. 'gdb info threads' - to see list of in-kenrel stacks to given task. 'gdb thread <Id>' - to switch. 'gdb bt' - to show it. Implmentation is machine specific. In x86_64, I use cmd_bt() to add additional gdb threads (gdb_add_substack(stack_id) call). Once added, gdb will may call machdep->get_current_task_reg() with corresonding stack_id (sid: new argument). Note: crash 'bt' command must be called for addition threads to appear. No threads/stacks support for arm64 and ppc64, x86_64 only. Example of #GP fault in the kernel caught by SCTP task.. crash> bt PID: 94228 TASK: ffff96a6766a8000 CPU: 31 COMMAND: "SCTP" #0 [ffffbb67437e7220] panic at ffffffff99b4f60b #1 [ffffbb67437e72c0] die_addr at ffffffff99033650 #2 [ffffbb67437e72f0] exc_general_protection at ffffffff99b9194b #3 [ffffbb67437e7390] asm_exc_general_protection at ffffffff99c00b47 [exception RIP: crypto_aead_encrypt+9] RIP: ffffffff995ce269 RSP: ffffbb67437e7440 RFLAGS: 00010246 RAX: 0fdd59d2b3d89ecb RBX: 0000000000000000 RCX: 0000000000000c90 RDX: ffff96a368508110 RSI: 0000000000000000 RDI: ffff96a348352060 RBP: ffffbb67437e7650 R8: 0000000000000001 R9: ffff96a3685080c8 R10: ffff96a348351c78 R11: 00000000d5a09e53 R12: 0000000000000008 R13: ffff96a348352010 R14: ffff96a348352000 R15: 0000000000000001 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #4 [ffffbb67437e7440] echainiv_encrypt at ffffffffc0ae82c2 [echainiv] #5 [ffffbb67437e7658] crypto_aead_encrypt at ffffffff995ce27c #6 [ffffbb67437e7668] esp_output_tail at ffffffffc0add3fc [esp4] #7 [ffffbb67437e76f8] esp_output at ffffffffc0addedf [esp4] #8 [ffffbb67437e7760] xfrm_output_resume at ffffffff99a9186a #9 [ffffbb67437e77e0] xfrm_output at ffffffff99a91fba #10 [ffffbb67437e7810] __xfrm4_output at ffffffff99a7b0e6 #11 [ffffbb67437e7820] xfrm4_output at ffffffff99a7b172 #12 [ffffbb67437e7890] ip_local_out at ffffffff99a000ef #13 [ffffbb67437e78b8] __ip_queue_xmit at ffffffff99a0028e #14 [ffffbb67437e7918] sctp_v4_xmit at ffffffffc0afe0f8 [sctp] #15 [ffffbb67437e79f0] sctp_packet_singleton at ffffffffc0b0bc47 [sctp] #16 [ffffbb67437e7a60] sctp_outq_flush at ffffffffc0b0c636 [sctp] #17 [ffffbb67437e7b08] sctp_outq_uncork at ffffffffc0b0d85c [sctp] #18 [ffffbb67437e7b18] sctp_do_sm at ffffffffc0afbaa6 [sctp] #19 [ffffbb67437e7d08] __sctp_connect at ffffffffc0b17893 [sctp] #20 [ffffbb67437e7d78] __sctp_setsockopt_connectx at ffffffffc0b17a6d [sctp] #21 [ffffbb67437e7da8] sctp_getsockopt at ffffffffc0b1c892 [sctp] #22 [ffffbb67437e7eb8] sock_common_getsockopt at ffffffff9993c6e7 #23 [ffffbb67437e7ec8] __sys_getsockopt at ffffffff9993afac #24 [ffffbb67437e7f18] __x64_sys_getsockopt at ffffffff9993b0bf #25 [ffffbb67437e7f28] x64_sys_call at ffffffff99004ca5 #26 [ffffbb67437e7f38] do_syscall_64 at ffffffff99b90e34 #27 [ffffbb67437e7f50] entry_SYSCALL_64_after_hwframe at ffffffff99c00126 RIP: 00007f12c63028ea RSP: 00007f10e41d9b28 RFLAGS: 00000206 RAX: ffffffffffffffda RBX: 0000000000000050 RCX: 00007f12c63028ea RDX: 000000000000006f RSI: 0000000000000084 RDI: 0000000000000050 RBP: 00007f10a00009b0 R8: 00007f10e41d9b3c R9: 00007f10ac000a5c R10: 00007f10e41d9b40 R11: 0000000000000206 R12: 00007f10e41db120 R13: 0000000000000050 R14: 0000000000000010 R15: 000000000289e070 ORIG_RAX: 0000000000000037 CS: 0033 SS: 002b crash> gdb bt #0 0xffffffff998eaadf in __inb (port=100) at ./arch/x86/include/asm/shared/io.h:22 #1 i8042_read_status () at drivers/input/serio/i8042-acpipnpio.h:54 #2 i8042_panic_blink (state=<optimized out>) at drivers/input/serio/i8042.c:1137 #3 0xffffffff99b4f60b in panic (fmt=fmt@entry=0xffffffff9a42c4cb "Fatal exception") at kernel/panic.c:460 #4 0xffffffff99b49b84 in oops_end (flags=<optimized out>, flags@entry=582, regs=<optimized out>, regs@entry=0xffffbb67437e7398, signr=<optimized out>) at arch/x86/kernel/dumpstack.c:382 #5 0xffffffff99033650 in die_addr (str=str@entry=0xffffbb67437e7304 "general protection fault, probably for non-canonical address 0xfdd59d2b3d89edb", regs=regs@entry=0xffffbb67437e7398, err=err@entry=0, gp_addr=<optimized out>) at arch/x86/kernel/dumpstack.c:462 #6 0xffffffff99b9194b in __exc_general_protection (error_code=0, regs=0xffffbb67437e7398) at arch/x86/kernel/traps.c:784 #7 exc_general_protection (regs=0xffffbb67437e7398, error_code=0) at arch/x86/kernel/traps.c:729 #8 0xffffffff99c00b47 in asm_exc_general_protection () at ./arch/x86/include/asm/idtentry.h:564 crash> gdb info threads Id Target Id Frame * 1 94228 SCTP (stack 0) 0xffffffff998eaadf in __inb (port=100) at ./arch/x86/include/asm/shared/io.h:22 2 94228 SCTP (stack 1) crypto_aead_encrypt (req=req@entry=0xffff96a348352060) at crypto/aead.c:86 crash> gdb thread 2 [Switching to thread 2 (94228 SCTP (stack 1))] #0 crypto_aead_encrypt (req=req@entry=0xffff96a348352060) at crypto/aead.c:86 86 crypto/aead.c: No such file or directory. crash> gdb bt #0 crypto_aead_encrypt (req=req@entry=0xffff96a348352060) at crypto/aead.c:86 #1 0xffffffffc0ae82c2 in echainiv_encrypt (req=0xffff96a348352010) at crypto/echainiv.c:82 #2 0xffffffff995ce27c in crypto_aead_encrypt (req=0xffff96a348352060) at crypto/aead.c:94 #3 0xffffffffc0add3fc in esp_output_tail () #4 0xffffffffc0addedf in esp_output () #5 0xffffffff99a9186a in xfrm_output_one (err=0, skb=0xffff96a3c852b300) at net/xfrm/xfrm_output.c:553 #6 xfrm_output_resume (sk=sk@entry=0xffff96a348368000, skb=skb@entry=0xffff96a3c852b300, err=<optimized out>, err@entry=1) at net/xfrm/xfrm_output.c:588 #7 0xffffffff99a91fba in xfrm_output2 (skb=0xffff96a3c852b300, sk=0xffff96a348368000, net=0xffff96a365582580) at net/xfrm/xfrm_output.c:615 #8 xfrm_output (sk=0xffff96a348368000, skb=0xffff96a3c852b300) at net/xfrm/xfrm_output.c:765 #9 0xffffffff99a7b0e6 in __xfrm4_output (net=<optimized out>, sk=<optimized out>, skb=<optimized out>) at net/ipv4/xfrm4_output.c:28 #10 0xffffffff99a7b172 in NF_HOOK_COND (pf=2 '\002', hook=4, okfn=0xffffffff99a7b0c0 <__xfrm4_output>, cond=<optimized out>, out=0xffff96a496ff2000, in=0x0, skb=0xffff96a3c852b300, sk=0xffff96a348368000, net=0xffff96a365582580) at ./include/linux/netfilter.h:291 #11 xfrm4_output (net=0xffff96a365582580, sk=0xffff96a348368000, skb=0xffff96a3c852b300) at net/ipv4/xfrm4_output.c:33 #12 0xffffffff99a000ef in dst_output (skb=0xffff96a368508110, sk=0x0, net=0xffff96a348352060) at ./include/net/dst.h:444 #13 ip_local_out (net=0xffff96a348352060, sk=0x0, skb=0xffff96a368508110) at net/ipv4/ip_output.c:126 #14 0xffffffff99a0028e in __ip_queue_xmit (sk=sk@entry=0xffff96a348368000, skb=skb@entry=0xffff96a3c852b300, fl=fl@entry=0xffff96a348351830, tos=tos@entry=186 '\272') at net/ipv4/ip_output.c:532 #15 0xffffffffc0afe0f8 in sctp_v4_xmit (skb=0xffff96a3c852b300, t=0xffff96a348351800) at net/sctp/protocol.c:1071 #16 0xffffffffc0b1f553 in sctp_packet_transmit (packet=packet@entry=0xffffbb67437e79f8, gfp=gfp@entry=3264) at net/sctp/output.c:653 #17 0xffffffffc0b0bc47 in sctp_packet_singleton (transport=<optimized out>, chunk=chunk@entry=0xffff96a34c96f500, gfp=3264) at net/sctp/outqueue.c:783 #18 0xffffffffc0b0c636 in sctp_outq_flush_ctrl (ctx=0xffffbb67437e7aa0) at net/sctp/outqueue.c:914 #19 sctp_outq_flush (q=0xffff96a3483585b8, rtx_timeout=rtx_timeout@entry=0, gfp=<optimized out>) at net/sctp/outqueue.c:1212 #20 0xffffffffc0b0d85c in sctp_outq_uncork (q=q@entry=0xffff96a3483585b8, gfp=gfp@entry=3264) at net/sctp/outqueue.c:764 #21 0xffffffffc0afbaa6 in sctp_cmd_interpreter (state=<optimized out>, status=<optimized out>, gfp=<optimized out>, commands=0xffffbb67437e7b68, event_arg=<optimized out>, asoc=0xffff96a348358000, ep=<optimized out>, subtype=..., event_type=<optimized out>) at net/sctp/sm_sideeffect.c:1819 #22 sctp_side_effects (gfp=<optimized out>, commands=0xffffbb67437e7b68, status=<optimized out>, event_arg=<optimized out>, asoc=<synthetic pointer>, ep=<optimized out>, state=<optimized out>, subtype=..., event_type=<optimized out>) at net/sctp/sm_sideeffect.c:1199 #23 sctp_do_sm (net=<optimized out>, event_type=event_type@entry=SCTP_EVENT_T_PRIMITIVE, subtype=..., subtype@entry=..., state=<optimized out>, ep=<optimized out>, asoc=<optimized out>, event_arg=<optimized out>, gfp=<optimized out>) at net/sctp/sm_sideeffect.c:1170 #24 0xffffffffc0b1e2f0 in sctp_primitive_ASSOCIATE (net=<optimized out>, asoc=asoc@entry=0xffff96a348358000, arg=arg@entry=0x0) at net/sctp/primitive.c:73 #25 0xffffffffc0b17893 in __sctp_connect (sk=sk@entry=0xffff96a348368000, kaddrs=kaddrs@entry=0xffff96a342085030, addrs_size=addrs_size@entry=16, flags=2050, assoc_id=assoc_id@entry=0xffffbb67437e7df4) at ./include/net/net_namespace.h:369 #26 0xffffffffc0b17a6d in __sctp_setsockopt_connectx (sk=sk@entry=0xffff96a348368000, kaddrs=kaddrs@entry=0xffff96a342085030, addrs_size=16, assoc_id=assoc_id@entry=0xffffbb67437e7df4) at net/sctp/socket.c:1334 #27 0xffffffffc0b1c892 in sctp_getsockopt_connectx3 (optlen=0x7f10e41d9b3c, optval=0x7f10e41d9b40 <error: Cannot access memory at address 0x7f10e41d9b40>, len=16, sk=0xffff96a348368000) at net/sctp/socket.c:1419 #28 sctp_getsockopt (sk=0xffff96a348368000, level=<optimized out>, optname=<optimized out>, optval=0x7f10e41d9b40 <error: Cannot access memory at address 0x7f10e41d9b40>, optlen=<optimized out>) at net/sctp/socket.c:8124 #29 0xffffffff9993c6e7 in sock_common_getsockopt (sock=<optimized out>, level=0, optname=1750106384, optval=0xc90 <error: Cannot access memory at address 0xc90>, optlen=0x1) at net/core/sock.c:3652 #30 0xffffffff9993afac in __sys_getsockopt (fd=<optimized out>, level=132, optname=111, optval=0x7f10e41d9b40 <error: Cannot access memory at address 0x7f10e41d9b40>, optlen=<optimized out>) at net/socket.c:2327 #31 0xffffffff9993b0bf in __do_sys_getsockopt (optlen=<optimized out>, optval=<optimized out>, optname=<optimized out>, level=<optimized out>, fd=<optimized out>) at net/socket.c:2342 #32 __se_sys_getsockopt (optlen=<optimized out>, optval=<optimized out>, optname=<optimized out>, level=<optimized out>, fd=<optimized out>) at net/socket.c:2339 #33 __x64_sys_getsockopt (regs=<optimized out>) at net/socket.c:2339 #34 0xffffffff99004ca5 in x64_sys_call (regs=regs@entry=0xffffbb67437e7f58, nr=<optimized out>) at ./arch/x86/include/generated/asm/syscalls_64.h:56 #35 0xffffffff99b90e34 in do_syscall_x64 (nr=<optimized out>, regs=0xffffbb67437e7f58) at arch/x86/entry/common.c:51 #36 do_syscall_64 (regs=0xffffbb67437e7f58, nr=<optimized out>) at arch/x86/entry/common.c:81 #37 0xffffffff99c00126 in entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:121 Now we can use GDB to see the root cause. Signed-off-by: Alexey Makhalov <alexey.makhalov@xxxxxxxxxxxx> --- arm64.c | 2 +- crash_target.c | 25 ++++++++++++++++++---- defs.h | 3 ++- gdb_interface.c | 6 +++--- ppc64.c | 2 +- x86_64.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++-- 6 files changed, 81 insertions(+), 12 deletions(-) diff --git a/arm64.c b/arm64.c index 608b19d..62f91d8 100644 --- a/arm64.c +++ b/arm64.c @@ -204,7 +204,7 @@ out: static int arm64_get_current_task_reg(int regno, const char *name, - int size, void *value) + int size, void *value, int unused) { struct bt_info bt_info, bt_setup; struct task_context *tc; diff --git a/crash_target.c b/crash_target.c index 1080976..8b17ef8 100644 --- a/crash_target.c +++ b/crash_target.c @@ -27,8 +27,9 @@ void crash_target_init (void); extern "C" int gdb_readmem_callback(unsigned long, void *, int, int); extern "C" int crash_get_current_task_reg (int regno, const char *regname, - int regsize, void *val); + int regsize, void *val, int sid); extern "C" int gdb_change_thread_context (void); +extern "C" int gdb_add_substack (int sid); extern "C" void crash_get_current_task_info(unsigned long *pid, char **comm); /* The crash target. */ @@ -64,9 +65,10 @@ public: unsigned long pid; char *comm; crash_get_current_task_info(&pid, &comm); - return string_printf ("%ld %s", pid, comm); + if (thread_count(this) == 1) + return string_printf ("%ld %s", pid, comm); + return string_printf ("%ld %s (stack %ld)", pid, comm, ptid.tid()); } - }; static void supply_registers(struct regcache *regcache, int regno) @@ -79,7 +81,7 @@ static void supply_registers(struct regcache *regcache, int regno) if (regsize > sizeof (regval)) error (_("fatal error: buffer size is not enough to fit register value")); - if (crash_get_current_task_reg (regno, regname, regsize, (void *)®val)) + if (crash_get_current_task_reg (regno, regname, regsize, (void *)®val, inferior_thread()->ptid.tid())) regcache->raw_supply (regno, regval); else regcache->raw_supply (regno, NULL); @@ -144,7 +146,22 @@ crash_target_init (void) extern "C" int gdb_change_thread_context (void) { + for (thread_info *tp : current_inferior()->threads_safe()) + if (tp->ptid.tid_p()) + delete_thread (tp); target_fetch_registers(get_current_regcache(), -1); reinit_frame_cache(); return TRUE; } + +/* Add a thread for each additional stack. Use stack ID as a thread ID */ +extern "C" int +gdb_add_substack (int sid) +{ + ptid_t ptid = ptid_t(CRASH_INFERIOR_PID, 0, sid); + + thread_info *tp = find_thread_ptid (current_inferior(), ptid); + if (tp == nullptr) + add_thread_silent (current_inferior()->process_target(), ptid); + return TRUE; +} diff --git a/defs.h b/defs.h index b93a7a6..bb2bc20 100644 --- a/defs.h +++ b/defs.h @@ -1081,7 +1081,7 @@ struct machdep_table { void (*get_irq_affinity)(int); void (*show_interrupts)(int, ulong *); int (*is_page_ptr)(ulong, physaddr_t *); - int (*get_current_task_reg)(int, const char *, int, void *); + int (*get_current_task_reg)(int, const char *, int, void *, int); int (*is_cpu_prstatus_valid)(int cpu); }; @@ -8301,5 +8301,6 @@ enum ppc64_regnum { /* crash_target.c */ extern int gdb_change_thread_context (void); +extern int gdb_add_substack (int sid); #endif /* !GDB_COMMON */ diff --git a/gdb_interface.c b/gdb_interface.c index 315711e..c138c94 100644 --- a/gdb_interface.c +++ b/gdb_interface.c @@ -1074,12 +1074,12 @@ unsigned long crash_get_kaslr_offset(void) /* Callbacks for crash_target */ int crash_get_current_task_reg (int regno, const char *regname, - int regsize, void *value); + int regsize, void *value, int sid); int crash_get_current_task_reg (int regno, const char *regname, - int regsize, void *value) + int regsize, void *value, int sid) { if (!machdep->get_current_task_reg) return FALSE; - return machdep->get_current_task_reg(regno, regname, regsize, value); + return machdep->get_current_task_reg(regno, regname, regsize, value, sid); } diff --git a/ppc64.c b/ppc64.c index 782107b..1cf06e3 100644 --- a/ppc64.c +++ b/ppc64.c @@ -2512,7 +2512,7 @@ ppc64_print_eframe(char *efrm_str, struct ppc64_pt_regs *regs, static int ppc64_get_current_task_reg(int regno, const char *name, int size, - void *value) + void *value, int unused) { struct bt_info bt_info, bt_setup; struct task_context *tc; diff --git a/x86_64.c b/x86_64.c index e7f8fe2..2e7cde4 100644 --- a/x86_64.c +++ b/x86_64.c @@ -126,7 +126,7 @@ static int x86_64_get_framesize(struct bt_info *, ulong, ulong, char *); static void x86_64_framesize_debug(struct bt_info *); static void x86_64_get_active_set(void); static int x86_64_get_kvaddr_ranges(struct vaddr_range *); -static int x86_64_get_current_task_reg(int, const char *, int, void *); +static int x86_64_get_current_task_reg(int, const char *, int, void *, int); static int x86_64_verify_paddr(uint64_t); static void GART_init(void); static void x86_64_exception_stacks_init(void); @@ -143,6 +143,14 @@ struct machine_specific x86_64_machine_specific = { 0 }; static const char *exception_functions_orig[]; static const char *exception_functions_5_8[]; +/* + * Additional stacks entry registers for gdb target. + * See 'gdb info threads' + */ +#define MAX_STACKS_NUM 5 +ulong stack_idx; +ulong stacks_regs[MAX_STACKS_NUM][SS_REGNUM + 1]; + /* Use this hardwired version -- sometimes the * debuginfo doesn't pick this up even though * it exists in the kernel; it shouldn't change. @@ -3551,6 +3559,7 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in) irq_eframe = 0; last_process_stack_eframe = 0; bt->call_target = NULL; + stack_idx = 0; rsp = bt->stkptr; ms = machdep->machspec; @@ -4159,6 +4168,7 @@ x86_64_dwarf_back_trace_cmd(struct bt_info *bt_in) last_process_stack_eframe = 0; bt->call_target = NULL; bt->bptr = 0; + stack_idx = 0; rsp = bt->stkptr; if (!rsp) { error(INFO, "cannot determine starting stack pointer\n"); @@ -4799,6 +4809,36 @@ x86_64_exception_frame(ulong flags, ulong kvaddr, char *local, } else if (machdep->flags & ORC) bt->bptr = rbp; + + /* + * Preserve registers set for each additional in-kernel stack + * up to MAX_STACKS_NUM. + */ + if (!(cs & 3) && verified && stack_idx < MAX_STACKS_NUM) { + stacks_regs[stack_idx][RAX_REGNUM] = rax; + stacks_regs[stack_idx][RBX_REGNUM] = rbx; + stacks_regs[stack_idx][RCX_REGNUM] = rcx; + stacks_regs[stack_idx][RDX_REGNUM] = rdx; + stacks_regs[stack_idx][RSI_REGNUM] = rsi; + stacks_regs[stack_idx][RDI_REGNUM] = rdi; + stacks_regs[stack_idx][RBP_REGNUM] = rbp; + stacks_regs[stack_idx][RSP_REGNUM] = rsp; + stacks_regs[stack_idx][R8_REGNUM] = r8; + stacks_regs[stack_idx][R9_REGNUM] = r9; + stacks_regs[stack_idx][R10_REGNUM] = r10; + stacks_regs[stack_idx][R11_REGNUM] = r11; + stacks_regs[stack_idx][R12_REGNUM] = r12; + stacks_regs[stack_idx][R13_REGNUM] = r13; + stacks_regs[stack_idx][R14_REGNUM] = r14; + stacks_regs[stack_idx][R15_REGNUM] = r15; + stacks_regs[stack_idx][RIP_REGNUM] = rip; + stacks_regs[stack_idx][EFLAGS_REGNUM] = rflags; + stacks_regs[stack_idx][CS_REGNUM] = cs; + stacks_regs[stack_idx][SS_REGNUM] = ss; + /* Skip stack 0 (main stack), start with index 1 */ + gdb_add_substack (stack_idx + 1); + stack_idx++; + } if (kvaddr) FREEBUF(pt_regs_buf); @@ -9236,7 +9276,7 @@ x86_64_get_kvaddr_ranges(struct vaddr_range *vrp) static int x86_64_get_current_task_reg(int regno, const char *name, - int size, void *value) + int size, void *value, int sid) { struct bt_info bt_info, bt_setup; struct task_context *tc; @@ -9256,6 +9296,17 @@ x86_64_get_current_task_reg(int regno, const char *name, if (!tc) return FALSE; + /* Non zero stack ID, use saved regs */ + if (sid && sid <= MAX_STACKS_NUM) { + switch (regno) { + case RAX_REGNUM ... SS_REGNUM: + memcpy(value, &stacks_regs[sid - 1][regno], size > 8 ? 8 : size); + return TRUE; + default: + return FALSE; + } + } + /* * Task is active, grab CPU's registers */ -- 2.43.5 -- Crash-utility mailing list -- devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxxxxxxxxxxxx https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/ Contribution Guidelines: https://github.com/crash-utility/crash/wiki