Re: cannot find stack info on ppc64le (call out to all IBM'ers on this list)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




----- Original Message -----
> Thanks so much for your reply, Dave.
> 
> On Mon, Jan 19, 2015 at 03:33:41PM -0500, Dave Anderson wrote:
> > 
> > Han,
> > 
> > This is much worse than I thought.  When you said "sometimes", you must
> > mean "all of the time" with respect to the active tasks?  Because that's
> > what I see here.
> Yes, looks like bt doesn't show stack frame all the time ...
> > 
> > So Han, can you find out who in IBM should be responsible for supporting
> > ppc64le in the crash utility?  Or is it you?
> > 
> No, I'm just a tester. I'll try to file a bug on this problem. It'll be
> assigned to the person who in charge of supporting crash utility.
> 
> Thanks again.

Han,

I've attached the patch that I've queued for crash-7.1.0:

  https://github.com/crash-utility/crash/commit/dc4ea682a21567dd9d093862ec54eb8529199c05

As I mentioned earlier, it fixes the default stack-search method for determining 
the backtrace starting hooks.

For example, without the patch, here is what I see on a ppc64le compressed kdump:
  
  crash> bt -a
  PID: 12674  TASK: c00000002cc08810  CPU: 0   COMMAND: "bash"
  
  PID: 0      TASK: c0000001ee020000  CPU: 1   COMMAND: "swapper/1"
  
  PID: 0      TASK: c0000001ee021370  CPU: 2   COMMAND: "swapper/2"
  
  PID: 0      TASK: c0000001ee0226e0  CPU: 3   COMMAND: "swapper/3"
  
  PID: 0      TASK: c0000001ee023a50  CPU: 4   COMMAND: "swapper/4"
  
  PID: 0      TASK: c0000001ee024dc0  CPU: 5   COMMAND: "swapper/5"
  
  PID: 0      TASK: c0000001ee026130  CPU: 6   COMMAND: "swapper/6"
  
  PID: 0      TASK: c0000001ee0274a0  CPU: 7   COMMAND: "swapper/7"
  crash> 

With the patch applied, it looks like this:

  crash> bt -a
  PID: 12674  TASK: c00000002cc08810  CPU: 0   COMMAND: "bash"
   #0 [c00000002925b6f0] crash_kexec at c00000000016e9d8
   #1 [c00000002925b8e0] die at c000000000021628
   #2 [c00000002925b980] bad_page_fault at c0000000000549e0
   #3 [c00000002925b9f0] handle_page_fault at c000000000009584
   Data Access error  [300] exception frame:
   R0:  c000000000562b00    R1:  c00000002925bce0    R2:  c0000000010ade68   
   R3:  0000000000000063    R4:  c000000001608018    R5:  c000000001618c88   
   R6:  00000000000057a8    R7:  c000000001259420    R8:  c000000000c6de68   
   R9:  0000000000000001    R10: 0000000000000000    R11: 000000000000013f   
   R12: c000000000561ec0    R13: c000000007e40000    R14: 0000000010139e60   
   R15: 0000000040000000    R16: 0000000000000000    R17: 0000000000000000   
   R18: 00000000101306fc    R19: 0000000010139dec    R20: 0000000010139df0   
   R21: 0000000010092c40    R22: 0000000000000000    R23: 0000000000000000   
   R24: 0000000000000001    R25: 0000000000000007    R26: c00000000100add8   
   R27: 0000000000000063    R28: c0000000014a9b98    R29: c00000000100b198   
   R30: c000000000fd8ba0    R31: 0000000000000002   
   NIP: c000000000561ee0    MSR: 8000000000009033    OR3: c000000000010a48
   CTR: c000000000561ec0    LR:  c000000000562b20    XER: 0000000020000000
   CCR: 0000000048222822    MQ:  0000000000000000    DAR: 0000000000000000
   DSISR: 0000000042000000     Syscall Result: 0000000000000000
   #4 [c00000002925bce0] sysrq_handle_crash at c000000000561ee0
   [Link Register ]  [c00000002925bce0] write_sysrq_trigger at c000000000562b20
   #5 [c00000002925bd90] proc_reg_write at c00000000037f4f4
   #6 [c00000002925bdd0] sys_write at c0000000002d6158
   #7 [c00000002925be30] syscall_exit at c00000000000a0fc
   syscall  [c00] exception frame:
   R0:  0000000000000004    R1:  00003fffd7e8e5c0    R2:  00003fff957e9768   
   R3:  0000000000000001    R4:  00003fff8ed30000    R5:  0000000000000002   
   R6:  0000000000000000    R7:  0000000000000000    R8:  0000000000000000   
   R9:  0000000000000000    R10: 0000000000000000    R11: 0000000000000000   
   R12: 0000000000000000    R13: 00003fff958eb450    R14: 0000000010139e60   
   R15: 0000000040000000    R16: 0000000000000000    R17: 0000000000000000   
   R18: 00000000101306fc    R19: 0000000010139dec    R20: 0000000010139df0   
   R21: 0000000010092c40    R22: 0000000000000000    R23: 0000000000000000   
   R24: 000000001013b5f0    R25: 00000000100f9a48    R26: 000001000bf78c50   
   R27: 0000000000000001    R28: 0000000000000002    R29: 00003fff957e12e8   
   R30: 00003fff8ed30000    R31: 0000000000000002   
   NIP: 00003fff95712960    MSR: 800000010280f033    OR3: 0000000000000001
   CTR: 0000000000000000    LR:  00003fff95696574    XER: 0000000000000000
   CCR: 0000000048222822    MQ:  0000000000000001    DAR: 00003fff95860308
   DSISR: 000000000a000000     Syscall Result: 0000000000000000
  
  PID: 0      TASK: c0000001ee020000  CPU: 1   COMMAND: "swapper/1"
   #0 [c0000001eff6b9f0] crash_ipi_callback at c000000000051440
   #1 [c0000001eff6ba30] smp_ipi_demux at c000000000048124
   #2 [c0000001eff6ba70] doorbell_exception at c000000000041d4c
   #3 [c0000001eff6baa0] doorbell_super_common at c000000000002714
   reserved  [a01] exception frame:
   R0:  c000000000718a8c    R1:  c0000001eff6bd90    R2:  c0000000010ade68   
   R3:  0000000000000a00    R4:  ffffffffffffffff    R5:  0000000000000001   
   R6:  0005eafdf829448b    R7:  00000000009e0000    R8:  0000000000000002   
   R9:  7fffffffffffffff    R10: 000000000000003b    R11: 000000000000001f   
   R12: 0000000000000000    R13: c000000007e40380   
   NIP: c000000000010880    MSR: 8000000100009033    OR3: c00000000071a95c
   CTR: c00000000071a7d0    LR:  c000000000010880    XER: 0000000000000000
   CCR: 0000000044000084    MQ:  0000000000000001    DAR: c0000000010f09d8
   DSISR: c0000000016482f8     Syscall Result: 0000000000000000
   #4 [c0000001eff6bd90] arch_local_irq_restore at c000000000010880  (unreliable)
   #5 [c0000001eff6bdb0] cpuidle_idle_call at c000000000718a8c
   #6 [c0000001eff6be10] pseries_lpar_idle at c0000000000894d8
   #7 [c0000001eff6be70] arch_cpu_idle at c000000000018118
   #8 [c0000001eff6bea0] cpu_startup_entry at c0000000001455a0
   #9 [c0000001eff6bf20] start_secondary at c000000000049080
  #10 [c0000001eff6bf90] start_secondary_prolog at c000000000009a6c
  
  PID: 0      TASK: c0000001ee021370  CPU: 2   COMMAND: "swapper/2"
   #0 [c0000001eff6f9f0] crash_ipi_callback at c000000000051440
   #1 [c0000001eff6fa30] smp_ipi_demux at c000000000048124
   #2 [c0000001eff6fa70] doorbell_exception at c000000000041d4c
   #3 [c0000001eff6faa0] doorbell_super_common at c000000000002714
   reserved  [a01] exception frame:
   R0:  c000000000718a8c    R1:  c0000001eff6fd90    R2:  c0000000010ade68   
   R3:  0000000000000a00    R4:  ffffffffffffffff    R5:  0000000000000001   
   R6:  0005eafdf8294338    R7:  0000000000a20000    R8:  0000000000000002   
   R9:  0000000000000000    R10: c00000000001ec00    R11: 0000000000000079   
   R12: 0000000000000000    R13: c000000007e40700   
   NIP: c000000000010880    MSR: 8000000100009033    OR3: c000000000147548
   CTR: c00000000001ec00    LR:  c000000000010880    XER: 0000000000000000
   CCR: 0000000044000084    MQ:  0000000000000001    DAR: c0000000010f09d8
   DSISR: c0000000016882f8     Syscall Result: 0000000000000000
   #4 [c0000001eff6fd90] arch_local_irq_restore at c000000000010880  (unreliable)
   #5 [c0000001eff6fdb0] cpuidle_idle_call at c000000000718a8c
   #6 [c0000001eff6fe10] pseries_lpar_idle at c0000000000894d8
   #7 [c0000001eff6fe70] arch_cpu_idle at c000000000018118
   #8 [c0000001eff6fea0] cpu_startup_entry at c0000000001455a0
   #9 [c0000001eff6ff20] start_secondary at c000000000049080
  #10 [c0000001eff6ff90] start_secondary_prolog at c000000000009a6c
  
  PID: 0      TASK: c0000001ee0226e0  CPU: 3   COMMAND: "swapper/3"
   #0 [c0000001eff739f0] crash_ipi_callback at c000000000051440
   #1 [c0000001eff73a30] smp_ipi_demux at c000000000048124
   #2 [c0000001eff73a70] doorbell_exception at c000000000041d4c
   #3 [c0000001eff73aa0] doorbell_super_common at c000000000002714
   reserved  [a01] exception frame:
   R0:  c000000000718a8c    R1:  c0000001eff73d90    R2:  c0000000010ade68   
   R3:  0000000000000a00    R4:  ffffffffffffffff    R5:  0000000000000001   
   R6:  0005eafdf82943ca    R7:  0000000000a60000    R8:  0000000000000002   
   R9:  0000000000000020    R10: c0000000010ede68    R11: 0000000000000144   
   R12: 00003fff8605280c    R13: 00003fff7f806900   
   NIP: c000000000010880    MSR: 8000000100009033    OR3: c000000000718a40
   CTR: 0000000000000000    LR:  c000000000010880    XER: 0000000000000000
   CCR: 0000000044000084    MQ:  0000000000000001    DAR: c0000000016c0380
   DSISR: c0000001eff73ca0     Syscall Result: 0000000000000000
   #4 [c0000001eff73d90] arch_local_irq_restore at c000000000010880  (unreliable)
   #5 [c0000001eff73db0] cpuidle_idle_call at c000000000718a8c
   #6 [c0000001eff73e10] pseries_lpar_idle at c0000000000894d8
   #7 [c0000001eff73e70] arch_cpu_idle at c000000000018118
   #8 [c0000001eff73ea0] cpu_startup_entry at c0000000001455a0
   #9 [c0000001eff73f20] start_secondary at c000000000049080
  #10 [c0000001eff73f90] start_secondary_prolog at c000000000009a6c
  
  PID: 0      TASK: c0000001ee023a50  CPU: 4   COMMAND: "swapper/4"
   #0 [c0000001eff779a0] crash_ipi_callback at c000000000051440
   #1 [c0000001eff779e0] smp_ipi_demux at c000000000048124
   #2 [c0000001eff77a20] doorbell_exception at c000000000041d4c
   #3 [c0000001eff77a50] doorbell_super_common at c000000000002714
   reserved  [a01] exception frame:
   R0:  0000000024000084    R1:  c0000001eff77d40    R2:  c0000000010ade68   
   R3:  0000000000000000    R4:  c00000000101b980    R5:  0000000000000000   
   R6:  001bc0f45c000000    R7:  0000000000000000    R8:  0000000000000000   
   R9:  c000000007c40c00    R10: 0000000000000001    R11: 0005eafdf7a13a2f   
   R12: 0000000000000000    R13: c000000007e40e00   
   NIP: c000000000086ae4    MSR: 8000000100009033    OR3: c00000000071b334
   CTR: c0000000000aca7c    LR:  c00000000071b338    XER: 0000000000000000
   CCR: 0000000024000084    MQ:  0000000000000001    DAR: 0000000000000001
   DSISR: c0000001eff77c60     Syscall Result: 0000000000000000
   #4 [c0000001eff77d40] plpar_hcall_norets at c000000000086ae4
   [Link Register ]  [c0000001eff77d40] shared_cede_loop at c00000000071b338
   #5 [c0000001eff77db0] cpuidle_idle_call at c000000000718a70
   #6 [c0000001eff77e10] pseries_lpar_idle at c0000000000894d8
   #7 [c0000001eff77e70] arch_cpu_idle at c000000000018118
   #8 [c0000001eff77ea0] cpu_startup_entry at c0000000001455a0
   #9 [c0000001eff77f20] start_secondary at c000000000049080
  #10 [c0000001eff77f90] start_secondary_prolog at c000000000009a6c
  
  PID: 0      TASK: c0000001ee024dc0  CPU: 5   COMMAND: "swapper/5"
   #0 [c0000001eff7b9a0] crash_ipi_callback at c000000000051440
   #1 [c0000001eff7b9e0] smp_ipi_demux at c000000000048124
   #2 [c0000001eff7ba20] doorbell_exception at c000000000041d4c
   #3 [c0000001eff7ba50] doorbell_super_common at c000000000002714
   reserved  [a01] exception frame:
   R0:  0000000024000084    R1:  c0000001eff7bd40    R2:  c0000000010ade68   
   R3:  0000000000000000    R4:  c00000000101b980    R5:  0000000000000000   
   R6:  001bc0f45c000000    R7:  0000000000000000    R8:  0000000000000000   
   R9:  c000000007c41000    R10: 0000000000000001    R11: 0005eafdf7a13a2f   
   R12: 0000000000000000    R13: c000000007e41180   
   NIP: c000000000086ae4    MSR: 8000000100009033    OR3: c00000000071acec
   CTR: c0000000000aca7c    LR:  c00000000071b338    XER: 0000000000000000
   CCR: 0000000024000084    MQ:  0000000000000001    DAR: 0000000000000001
   DSISR: c0000001eff7bc60     Syscall Result: 0000000000000000
   #4 [c0000001eff7bd40] plpar_hcall_norets at c000000000086ae4
   [Link Register ]  [c0000001eff7bd40] shared_cede_loop at c00000000071b338
   #5 [c0000001eff7bdb0] cpuidle_idle_call at c000000000718a70
   #6 [c0000001eff7be10] pseries_lpar_idle at c0000000000894d8
   #7 [c0000001eff7be70] arch_cpu_idle at c000000000018118
   #8 [c0000001eff7bea0] cpu_startup_entry at c0000000001455a0
   #9 [c0000001eff7bf20] start_secondary at c000000000049080
  #10 [c0000001eff7bf90] start_secondary_prolog at c000000000009a6c
  
  PID: 0      TASK: c0000001ee026130  CPU: 6   COMMAND: "swapper/6"
   #0 [c0000001eff7f9a0] crash_ipi_callback at c000000000051440
   #1 [c0000001eff7f9e0] smp_ipi_demux at c000000000048124
   #2 [c0000001eff7fa20] doorbell_exception at c000000000041d4c
   #3 [c0000001eff7fa50] doorbell_super_common at c000000000002714
   reserved  [a01] exception frame:
   R0:  0000000024000084    R1:  c0000001eff7fd40    R2:  c0000000010ade68   
   R3:  0000000000000000    R4:  c00000000101b980    R5:  0000000000000000   
   R6:  001bc0f45c000000    R7:  0000000000000000    R8:  0000000000000000   
   R9:  c000000007c41400    R10: 0000000000000001    R11: 0005eafdf7a13a2f   
   R12: 0000000000000000    R13: c000000007e41500   
   NIP: c000000000086ae4    MSR: 8000000100009033    OR3: c00000000047a0b4
   CTR: c0000000000aca7c    LR:  c00000000071b338    XER: 0000000000000000
   CCR: 0000000024000084    MQ:  0000000000000001    DAR: 0000000000000001
   DSISR: c0000001eff7fc60     Syscall Result: 0000000000000000
   #4 [c0000001eff7fd40] plpar_hcall_norets at c000000000086ae4
   [Link Register ]  [c0000001eff7fd40] shared_cede_loop at c00000000071b338
   #5 [c0000001eff7fdb0] cpuidle_idle_call at c000000000718a70
   #6 [c0000001eff7fe10] pseries_lpar_idle at c0000000000894d8
   #7 [c0000001eff7fe70] arch_cpu_idle at c000000000018118
   #8 [c0000001eff7fea0] cpu_startup_entry at c0000000001455a0
   #9 [c0000001eff7ff20] start_secondary at c000000000049080
  #10 [c0000001eff7ff90] start_secondary_prolog at c000000000009a6c
  
  PID: 0      TASK: c0000001ee0274a0  CPU: 7   COMMAND: "swapper/7"
   #0 [c0000001ee0839a0] crash_ipi_callback at c000000000051440
   #1 [c0000001ee0839e0] smp_ipi_demux at c000000000048124
   #2 [c0000001ee083a20] doorbell_exception at c000000000041d4c
   #3 [c0000001ee083a50] doorbell_super_common at c000000000002714
   reserved  [a01] exception frame:
   R0:  0000000024000084    R1:  c0000001ee083d40    R2:  c0000000010ade68   
   R3:  0000000000000000    R4:  c00000000101b980    R5:  0000000000000000   
   R6:  001bc0f45c000000    R7:  0000000000000000    R8:  0000000000000000   
   R9:  c000000007c41800    R10: 0000000000000001    R11: 0005eafdf7a13a2f   
   R12: 0000000000000000    R13: c000000007e41880   
   NIP: c000000000086ae4    MSR: 8000000100009033    OR3: c0000000001475dc
   CTR: c0000000000aca7c    LR:  c00000000071b338    XER: 0000000000000000
   CCR: 0000000024000084    MQ:  0000000000000001    DAR: 0000000000000001
   DSISR: c0000001ee083c60     Syscall Result: 0000000000000000
   #4 [c0000001ee083d40] plpar_hcall_norets at c000000000086ae4
   [Link Register ]  [c0000001ee083d40] shared_cede_loop at c00000000071b338
   #5 [c0000001ee083db0] cpuidle_idle_call at c000000000718a70
   #6 [c0000001ee083e10] pseries_lpar_idle at c0000000000894d8
   #7 [c0000001ee083e70] arch_cpu_idle at c000000000018118
   #8 [c0000001ee083ea0] cpu_startup_entry at c0000000001455a0
   #9 [c0000001ee083f20] start_secondary at c000000000049080
  #10 [c0000001ee083f90] start_secondary_prolog at c000000000009a6c
  crash>

Thanks,
  Dave
commit dc4ea682a21567dd9d093862ec54eb8529199c05
Author: Dave Anderson <anderson@xxxxxxxxxx>
Date:   Tue Jan 20 14:38:48 2015 -0500

    Fix for the PPC64 "bt" command on both big-endian and little-endian
    architectures.  Without the patch, backtraces of the active tasks
    may be "empty" on little-endian machines, or show a one-liner of
    the form: "#0 [c0000005f4db7a60] (null) at 501  (unreliable)" on
    big-endian machines.
    (anderson@xxxxxxxxxx)

diff --git a/ppc64.c b/ppc64.c
index a73200d..21588f5 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -1,7 +1,7 @@
 /* ppc64.c -- core analysis suite
  *
- * Copyright (C) 2004-2014 David Anderson
- * Copyright (C) 2004-2014 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2015 David Anderson
+ * Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
  * Copyright (C) 2004, 2006 Haren Myneni, IBM Corporation
  *
  * This program is free software; you can redistribute it and/or modify
@@ -1902,8 +1902,8 @@ ppc64_print_regs(struct ppc64_pt_regs *regs)
 	int i;
 
         /* print out the gprs... */
-        for(i=0; i<32; i++) {
-                if(!(i % 3))
+        for (i=0; i<32; i++) {
+                if (i && !(i % 3))
                         fprintf(fp, "\n");
 
                 fprintf(fp, " R%d:%s %016lx   ", i,
@@ -1944,9 +1944,8 @@ ppc64_print_eframe(char *efrm_str, struct ppc64_pt_regs *regs,
 	if (BT_REFERENCE_CHECK(bt))
 		return;
 
-	fprintf(fp, " %s  [%lx] exception frame:", efrm_str, regs->trap);
+	fprintf(fp, " %s  [%lx] exception frame:\n", efrm_str, regs->trap);
 	ppc64_print_regs(regs);
-	fprintf(fp, "\n");
 }
 
 /*
@@ -2000,8 +1999,6 @@ ppc64_kdump_stack_frame(struct bt_info *bt_in, ulong *nip, ulong *ksp)
 		fprintf(fp, " LR  [%016lx] %s\n", pt_regs->link,
 			closest_symbol(pt_regs->link));
 
-	fprintf(fp, "\n");
-
 	return TRUE;
 }
 
@@ -2012,7 +2009,7 @@ static int
 ppc64_get_dumpfile_stack_frame(struct bt_info *bt_in, ulong *nip, ulong *ksp)
 {
 	int panic_task;
-	int i, panic;
+	int i;
 	char *sym;
 	ulong *up;
 	struct bt_info bt_local, *bt;
@@ -2096,10 +2093,8 @@ ppc64_get_dumpfile_stack_frame(struct bt_info *bt_in, ulong *nip, ulong *ksp)
 			fprintf(fp, "Could not find SP for task %0lx\n",
 				bt->task);
 		}
-		return TRUE;
 	}
 
-	panic = FALSE;
 	/*
 	 * Check the process stack first. We are scanning stack for only
 	 * panic task. Even though we have dumping CPU's regs, we will be
@@ -2116,8 +2111,12 @@ retry:
                 if (STREQ(sym, ".netconsole_netdump") || 
 			STREQ(sym, ".netpoll_start_netdump") ||
 		 	STREQ(sym, ".start_disk_dump") ||
+		 	STREQ(sym, "crash_kexec") ||
+			STREQ(sym, "crash_fadump") ||
+		 	STREQ(sym, "crash_ipi_callback") ||
 		 	STREQ(sym, ".crash_kexec") ||
 			STREQ(sym, ".crash_fadump") ||
+		 	STREQ(sym, ".crash_ipi_callback") ||
 			STREQ(sym, ".disk_dump")) {
                         *nip = *up;
                         *ksp = bt->stackbase + 
@@ -2126,9 +2125,6 @@ retry:
                 }
 	}
 
-	if (panic) 
-		return TRUE;
-	
 	bt->flags &= ~(BT_HARDIRQ|BT_SOFTIRQ);
 
 	if (check_hardirq &&

--
Crash-utility mailing list
Crash-utility@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/crash-utility

[Index of Archives]     [Fedora Development]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]     [Fedora Tools]

 

Powered by Linux