[Crash-utility] Re: [PATCH v4 1/5] ppc64: correct gdb passthroughs by implementing machdep->get_cpu_reg

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Thank you for the update, Aditya.

On 12/15/23 15:59, Aditya Gupta wrote:
Currently, gdb passthroughs of 'bt', 'frame', 'up', 'down', 'info
locals' don't work. This is due to gdb not knowing the register values to
unwind the stack frames

Every gdb passthrough goes through `gdb_interface`. And then, gdb expects
`crash_target::fetch_registers` to give it the register values, which is
dependent on `machdep->get_cpu_reg` to read the register values for
specific architecture.

                                       ┌────────────────────────┐
            gdb passthrough (eg. "bt") │                        │
    crash   ─────────────────────────▶│                        │
                                       │      gdb_interface     │
                                       │                        │
                                       │                        │
                                       │ ┌────────────────────┐ │
                  fetch_registers      │ │                    │ │
crash_target◀────────────────────────┼─┤        gdb         │ │
             ─────────────────────────┼▶│                    │ │
               Registers (SP,NIP, etc.)│ │                    │ │
                                       │ │                    │ │
                                       │ └────────────────────┘ │
                                       └────────────────────────┘

Implement `machdep->get_cpu_reg` on PPC64, so that crash provides the
register values to gdb to unwind stack frames properly

With these changes, on powerpc, 'bt' command output in gdb mode, will look
like this:

     gdb> bt
     #0  0xc0000000002a53e8 in crash_setup_regs (oldregs=<optimized out>, newregs=0xc00000000486f8d8) at ./arch/powerpc/include/asm/kexec.h:69
     #1  __crash_kexec (regs=<optimized out>) at kernel/kexec_core.c:974
     #2  0xc000000000168918 in panic (fmt=<optimized out>) at kernel/panic.c:358
     #3  0xc000000000b735f8 in sysrq_handle_crash (key=<optimized out>) at drivers/tty/sysrq.c:155
     #4  0xc000000000b742cc in __handle_sysrq (key=key@entry=99, check_mask=check_mask@entry=false) at drivers/tty/sysrq.c:602
     #5  0xc000000000b7506c in write_sysrq_trigger (file=<optimized out>, buf=<optimized out>, count=2, ppos=<optimized out>) at drivers/tty/sysrq.c:1163
     #6  0xc00000000069a7bc in pde_write (ppos=<optimized out>, count=<optimized out>, buf=<optimized out>, file=<optimized out>, pde=0xc000000009ed3a80) at fs/proc/inode.c:340
     #7  proc_reg_write (file=<optimized out>, buf=<optimized out>, count=<optimized out>, ppos=<optimized out>) at fs/proc/inode.c:352
     #8  0xc0000000005b3bbc in vfs_write (file=file@entry=0xc00000009dda7d00, buf=buf@entry=0xebcfc7c6040 <error: Cannot access memory at address 0xebcfc7c6040>, count=count@entry=2, pos=pos@entry=0xc00000000486fda0) at fs/read_write.c:582

instead of earlier output without this patch:

     gdb> bt
     #0  <unavailable> in ?? ()
     Backtrace stopped: previous frame identical to this frame (corrupt stack?)

Also, 'get_dumpfile_regs' has been introduced to get registers from
multiple supported vmcore formats. Correspondingly a flag 'BT_NO_PRINT_REGS'
has been introduced to tell helper functions to get registers, to not
print registers with every call to backtrace in gdb.

  Note: This feature to support GDB unwinding doesn't support live debugging

Signed-off-by: Aditya Gupta <adityag@xxxxxxxxxxxxx>
---
  defs.h   | 124 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
  kernel.c |  33 +++++++++++++++
  ppc64.c  | 108 ++++++++++++++++++++++++++++++++++++++++++++++--
  3 files changed, 262 insertions(+), 3 deletions(-)

diff --git a/defs.h b/defs.h
index 5218a94fe4a4..0c80de72e89e 100644
--- a/defs.h
+++ b/defs.h
@@ -6023,6 +6023,7 @@ int load_module_symbols_helper(char *);
  void unlink_module(struct load_module *);
  int check_specified_module_tree(char *, char *);
  int is_system_call(char *, ulong);
+void get_dumpfile_regs(struct bt_info*, ulong*, ulong*);
  void generic_dump_irq(int);
  void generic_get_irq_affinity(int);
  void generic_show_interrupts(int, ulong *);
@@ -6121,6 +6122,7 @@ ulong cpu_map_addr(const char *type);
  #define BT_REGS_NOT_FOUND (0x4000000000000ULL)
  #define BT_OVERFLOW_STACK (0x8000000000000ULL)
  #define BT_SKIP_IDLE     (0x10000000000000ULL)
+#define BT_NO_PRINT_REGS (0x20000000000000ULL)
  #define BT_SYMBOL_OFFSET   (BT_SYMBOLIC_ARGS)
#define BT_REF_HEXVAL (0x1)
@@ -6662,6 +6664,8 @@ struct machine_specific {
void ppc64_init(int);
  void ppc64_dump_machdep_table(ulong);
+int ppc64_get_cpu_reg(int cpu, int regno, const char *name, int size,
+		  void *value);

This is only invoked in ppc64 module, so it should be good to define this function in the ppc64.c, and use the keywork 'static'.

Given that, no need to add it in defs.h.

  #define display_idt_table() \
          error(FATAL, "-d option is not applicable to PowerPC architecture\n")
  #define KSYMS_START     (0x1)
@@ -7854,4 +7858,124 @@ enum x86_64_regnum {
          LAST_REGNUM
  };
+/*
+ * Register numbers to make crash_target->fetch_registers()
+ * ---> machdep->get_cpu_reg() work properly.
+ *
+ *  These register numbers and names are given according to output of
+ *  `rs6000_register_name`, because that is what was being used by
+ *  crash_target::fetch_registers in case of PPC64
+ */

Some register number constants are defined in the gdb/ppc-tdep.h:

enum {
  PPC_R0_REGNUM = 0,
  PPC_F0_REGNUM = 32,
  PPC_PC_REGNUM = 64,
...

}

Do you mean that other undefined registers won't be used in this case? Just a confirmation.

+enum ppc64_renum {
+	PPC64_R0_REGNUM = 0,
+	PPC64_R1_REGNUM,
+	PPC64_R2_REGNUM,
+	PPC64_R3_REGNUM,
+	PPC64_R4_REGNUM,
+	PPC64_R5_REGNUM,
+	PPC64_R6_REGNUM,
+	PPC64_R7_REGNUM,
+	PPC64_R8_REGNUM,
+	PPC64_R9_REGNUM,
+	PPC64_R10_REGNUM,
+	PPC64_R11_REGNUM,
+	PPC64_R12_REGNUM,
+	PPC64_R13_REGNUM,
+	PPC64_R14_REGNUM,
+	PPC64_R15_REGNUM,
+	PPC64_R16_REGNUM,
+	PPC64_R17_REGNUM,
+	PPC64_R18_REGNUM,
+	PPC64_R19_REGNUM,
+	PPC64_R20_REGNUM,
+	PPC64_R21_REGNUM,
+	PPC64_R22_REGNUM,
+	PPC64_R23_REGNUM,
+	PPC64_R24_REGNUM,
+	PPC64_R25_REGNUM,
+	PPC64_R26_REGNUM,
+	PPC64_R27_REGNUM,
+	PPC64_R28_REGNUM,
+	PPC64_R29_REGNUM,
+	PPC64_R30_REGNUM,
+	PPC64_R31_REGNUM,
+
+	PPC64_F0_REGNUM = 32,
+	PPC64_F1_REGNUM,
+	PPC64_F2_REGNUM,
+	PPC64_F3_REGNUM,
+	PPC64_F4_REGNUM,
+	PPC64_F5_REGNUM,
+	PPC64_F6_REGNUM,
+	PPC64_F7_REGNUM,
+	PPC64_F8_REGNUM,
+	PPC64_F9_REGNUM,
+	PPC64_F10_REGNUM,
+	PPC64_F11_REGNUM,
+	PPC64_F12_REGNUM,
+	PPC64_F13_REGNUM,
+	PPC64_F14_REGNUM,
+	PPC64_F15_REGNUM,
+	PPC64_F16_REGNUM,
+	PPC64_F17_REGNUM,
+	PPC64_F18_REGNUM,
+	PPC64_F19_REGNUM,
+	PPC64_F20_REGNUM,
+	PPC64_F21_REGNUM,
+	PPC64_F22_REGNUM,
+	PPC64_F23_REGNUM,
+	PPC64_F24_REGNUM,
+	PPC64_F25_REGNUM,
+	PPC64_F26_REGNUM,
+	PPC64_F27_REGNUM,
+	PPC64_F28_REGNUM,
+	PPC64_F29_REGNUM,
+	PPC64_F30_REGNUM,
+	PPC64_F31_REGNUM,
+
+	PPC64_PC_REGNUM = 64,
+	PPC64_MSR_REGNUM = 65,
+	PPC64_CR_REGNUM = 66,
+	PPC64_LR_REGNUM = 67,
+	PPC64_CTR_REGNUM = 68,
+	PPC64_XER_REGNUM = 69,
+	PPC64_FPSCR_REGNUM = 70,
+
+	PPC64_VR0_REGNUM = 106,
+	PPC64_VR1_REGNUM,
+	PPC64_VR2_REGNUM,
+	PPC64_VR3_REGNUM,
+	PPC64_VR4_REGNUM,
+	PPC64_VR5_REGNUM,
+	PPC64_VR6_REGNUM,
+	PPC64_VR7_REGNUM,
+	PPC64_VR8_REGNUM,
+	PPC64_VR9_REGNUM,
+	PPC64_VR10_REGNUM,
+	PPC64_VR11_REGNUM,
+	PPC64_VR12_REGNUM,
+	PPC64_VR13_REGNUM,
+	PPC64_VR14_REGNUM,
+	PPC64_VR15_REGNUM,
+	PPC64_VR16_REGNUM,
+	PPC64_VR17_REGNUM,
+	PPC64_VR18_REGNUM,
+	PPC64_VR19_REGNUM,
+	PPC64_VR20_REGNUM,
+	PPC64_VR21_REGNUM,
+	PPC64_VR22_REGNUM,
+	PPC64_VR23_REGNUM,
+	PPC64_VR24_REGNUM,
+	PPC64_VR25_REGNUM,
+	PPC64_VR26_REGNUM,
+	PPC64_VR27_REGNUM,
+	PPC64_VR28_REGNUM,
+	PPC64_VR29_REGNUM,
+	PPC64_VR30_REGNUM,
+	PPC64_VR31_REGNUM,
+
+	PPC64_VSCR_REGNUM = 138,
+	PPC64_VRSAVE_REGNU = 139
+};
+
  #endif /* !GDB_COMMON */
diff --git a/kernel.c b/kernel.c
index 6dcf414693e6..52b7ba09f390 100644
--- a/kernel.c
+++ b/kernel.c
@@ -3533,6 +3533,39 @@ get_lkcd_regs(struct bt_info *bt, ulong *eip, ulong *esp)
  	machdep->get_stack_frame(bt, eip, esp);
  }
+void
+get_dumpfile_regs(struct bt_info *bt, ulong *eip, ulong *esp)
+{
+	bt->flags |= BT_NO_PRINT_REGS;
+
+	if (NETDUMP_DUMPFILE())
+		get_netdump_regs(bt, eip, esp);
+	else if (KDUMP_DUMPFILE())
+		get_kdump_regs(bt, eip, esp);
+	else if (DISKDUMP_DUMPFILE())
+		get_diskdump_regs(bt, eip, esp);
+	else if (KVMDUMP_DUMPFILE())
+		get_kvmdump_regs(bt, eip, esp);
+	else if (LKCD_DUMPFILE())
+		get_lkcd_regs(bt, eip, esp);
+	else if (XENDUMP_DUMPFILE())
+		get_xendump_regs(bt, eip, esp);
+	else if (SADUMP_DUMPFILE())
+		get_sadump_regs(bt, eip, esp);
+	else if (VMSS_DUMPFILE())
+		get_vmware_vmss_regs(bt, eip, esp);
+	else if (REMOTE_PAUSED()) {
+		if (!is_task_active(bt->task) || !get_remote_regs(bt, eip, esp))
+			machdep->get_stack_frame(bt, eip, esp);
+	} else
+		machdep->get_stack_frame(bt, eip, esp);
+
+	bt->flags &= ~BT_NO_PRINT_REGS;
+
+	bt->instptr = *eip;
+	bt->stkptr = *esp;
+}
+
/*
   *  Store the head of the kernel module list for future use.
diff --git a/ppc64.c b/ppc64.c
index e8930a139e0d..f5b0b7241ea4 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -55,6 +55,8 @@ static void ppc64_set_bt_emergency_stack(enum emergency_stack_type type,
  static char * ppc64_check_eframe(struct ppc64_pt_regs *);
  static void ppc64_print_eframe(char *, struct ppc64_pt_regs *,
  		struct bt_info *);
+int ppc64_get_cpu_reg(int cpu, int regno, const char *name, int size,
+		  void *value);

As I mentioned before, we can add a 'static' keyword.

  static void parse_cmdline_args(void);
  static int ppc64_paca_percpu_offset_init(int);
  static void ppc64_init_cpu_info(void);
@@ -704,6 +706,8 @@ ppc64_init(int when)
  				error(FATAL, "cannot malloc hwirqstack buffer space.");
  		}
+ machdep->get_cpu_reg = ppc64_get_cpu_reg;
+
  		ppc64_init_paca_info();
if (!machdep->hz) {
@@ -2501,6 +2505,102 @@ ppc64_print_eframe(char *efrm_str, struct ppc64_pt_regs *regs,
  	ppc64_print_nip_lr(regs, 1);
  }
+int
+ppc64_get_cpu_reg(int cpu, int regno, const char *name, int size,
+		  void *value)
Ditto.
+{
+	struct bt_info bt_info, bt_setup;
+	struct task_context *tc;
+	struct ppc64_pt_regs *pt_regs;
+	ulong ip, sp;
+
+	if (LIVE()) {
+		/* doesn't support reading registers in live dump */
+		return FALSE;
+	}
+
+	/* Currently only handling registers available in pt_regs:
+	 *
+	 * 0-31:   r0-r31
+	 * 64:     pc/nip
+	 * 65:     msr
+	 *
+	 * 67:     lr
+	 * 68:     ctr
+	 */
+	switch (regno) {
+	case PPC64_R0_REGNUM ... PPC64_R31_REGNUM:
+
+	case PPC64_PC_REGNUM:
+	case PPC64_MSR_REGNUM:
+	case PPC64_LR_REGNUM:
+	case PPC64_CTR_REGNUM:
+		break;
+
+	default:
+		// return false if we can't get that register

Could you please print the 'regno' value if the given register number is not handled for now?

if (CRASHDEBUG(1))

    error(WARNING, "unsupported register, regno=%d\n", regno);

+		return FALSE;
+	}
+
+	/* FIXME: Always setting the context to CURRENT_CONTEXT irrespective of whicher
+	 * thread we switched to, in gdb
+	 */

The above code comment can be removed, because the latter patch will change this behavior.

Or you could explain a little bit why to do this change in the code comment if needed.

+	tc = CURRENT_CONTEXT();
+	BZERO(&bt_setup, sizeof(struct bt_info));
+	clone_bt_info(&bt_setup, &bt_info, tc);
+	fill_stackbuf(&bt_info);
+
+	// reusing the get_dumpfile_regs function to get pt regs structure
+	get_dumpfile_regs(&bt_info, &sp, &ip);
+	pt_regs = (struct ppc64_pt_regs *)bt_info.machdep;
+
+	if (!pt_regs) {
+		error(WARNING, "pt_regs not available for cpu %d\n", cpu);
+		return FALSE;
+	}
+
+	switch (regno) {
+	case PPC64_R0_REGNUM ... PPC64_R31_REGNUM:
+		if (size != sizeof(pt_regs->gpr[regno]))
+			return FALSE;  // size mismatch
+
+		memcpy(value, &pt_regs->gpr[regno], size);
+		return TRUE;
+
+	case PPC64_PC_REGNUM:
+		if (size != sizeof(pt_regs->nip))
+			return FALSE;  // size mismatch
+
+		memcpy(value, &pt_regs->nip, size);
+		return TRUE;
+
+	case PPC64_MSR_REGNUM:
+		if (size != sizeof(pt_regs->msr))
+			return FALSE;  // size mismatch
+
+		memcpy(value, &pt_regs->msr, size);
+		return TRUE;
+
+	case PPC64_LR_REGNUM:
+		if (size != sizeof(pt_regs->link))
+			return FALSE;  // size mismatch
+
+		memcpy(value, &pt_regs->link, size);
+		return TRUE;
+
+	case PPC64_CTR_REGNUM:
+		if (size != sizeof(pt_regs->ctr))
+			return FALSE;  // size mismatch
+
+		memcpy(value, &pt_regs->ctr, size);
+		return TRUE;
+	}
+
+	printf("error: %s: statement after switch case should be unreachable"
+			, __func__);
+	return FALSE;

I would suggest the following changes:

...
switch (regno) {
+	case PPC64_R0_REGNUM ... PPC64_R31_REGNUM:
+		if (size != sizeof(pt_regs->gpr[regno]))
+			return FALSE;
+
+		memcpy(value, &pt_regs->gpr[regno], size);
+		break;
+       case...
+}
+return TRUE;
+}


The 'printf' and 'return FALSE' should not be reached. I guess this is debugging information, if 'yes', we can remove it this time.

Thanks
Lianbo

+}
+
  /*
   * For vmcore typically saved with KDump or FADump, get SP and IP values
   * from the saved ptregs.
@@ -2613,9 +2713,11 @@ ppc64_get_dumpfile_stack_frame(struct bt_info *bt_in, ulong *nip, ulong *ksp)
  		pt_regs = (struct ppc64_pt_regs *)bt->machdep;
  		ur_nip = pt_regs->nip;
  		ur_ksp = pt_regs->gpr[1];
-		/* Print the collected regs for panic task. */
-		ppc64_print_regs(pt_regs);
-		ppc64_print_nip_lr(pt_regs, 1);
+		if (!(bt->flags & BT_NO_PRINT_REGS)) {
+			/* Print the collected regs for panic task. */
+			ppc64_print_regs(pt_regs);
+			ppc64_print_nip_lr(pt_regs, 1);
+		}
  	} else if ((pc->flags & KDUMP) ||
  		   ((pc->flags & DISKDUMP) &&
  		    (*diskdump_flags & KDUMP_CMPRS_LOCAL))) {
--
Crash-utility mailing list -- devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx
To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxxxxxxxxxxxx
https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/
Contribution Guidelines: https://github.com/crash-utility/crash/wiki




[Index of Archives]     [Fedora Development]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]     [Fedora Tools]

 

Powered by Linux