Handle the NT_PRSTATUS lost for the "bt" command

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The purpose of this patch is to work out "bt" command for the diskdump
which NT_PRSTATUS note could not be saved by IPI lost.
I think IPI is possibly lost by panic under the serious crashed condition.

I noticed that "bt" failed in my ppc environment
when the NT_PRSTATUS notes are lost on some CPUs while IPI delivery.
Then, I made CPU map for prstatus in diskdump more correctable
by checking a validation of crash_notes field.

I've tested this problem by patching kernel like,
- kernel/kexec.c
void crash_save_cpu(struct pt_regs *regs, int cpu)
{
+        if (current->pid == 0)
+                /* this cpu was idle; nothing to capture */
+                return;

It looks terrible and impractical test case but actually
I met this code in my using distro's kernel.
I couldn't reproduce actual IPI lost case, then fortunately, use this
as a example of the causes if IPI could not be delivered to other CPUs.

=> Taking diskdump by sysrq+c and makedumpfile.

crash> help -D | grep notes
  num_prstatus_notes: 1
           notes_buf: 10ba91a8
            notes[0]: 10ba91a8
crash> help -k | grep cpus
          cpus: 8
 cpus_override: (null)
crash> bt
PID: 1001   TASK: ea62b000  CPU: 2   COMMAND: "bash"
Segmentation fault

Since seven idle cpus did not save NT_PRSTATUS note,
crash could not handle CPU#2's note where is located as CPU#0's.

With this patch, crash get to work out with correct CPU map to prstatus.

WARNING: catch lost crash_notes at cpu#0
WARNING: catch lost crash_notes at cpu#1
WARNING: catch lost crash_notes at cpu#3
WARNING: catch lost crash_notes at cpu#4
WARNING: catch lost crash_notes at cpu#5
WARNING: catch lost crash_notes at cpu#6
WARNING: catch lost crash_notes at cpu#7
crash.fix> help -D | grep notes
  num_prstatus_notes: 1
           notes_buf: 107a3378
            notes[2]: 107a3378
crash.fix> help -k | grep cpus
          cpus: 8
 cpus_override: (null)
crash.fix> bt
PID: 1001   TASK: ea62b000  CPU: 2   COMMAND: "bash"

R0:  00000001   R1:  eb793e60   R2:  ea62b000   R3:  00000063
R4:  00000000   R5:  ffffffff   R6:  c043ba2c   R7:  00000000
R8:  00008000   R9:  00000000   R10: 00000000   R11: eb793e70
R12: 28242444   R13: 100b8448   R14: 100b07b8   R15: 100b0894
R16: 00000000   R17: 00000000   R18: 00000000   R19: 1006d270
R20: 00000000   R21: 100f0430   R22: 00000000   R23: 00000001
R24: c08f1ac8   R25: 00029002   R26: c08f1bac   R27: c08d0000
R28: 00000000   R29: c09ada48   R30: 00000063   R31: eb793e60
NIP: c0423378   MSR: 00021002   OR3: c09ada48   CTR: c0423344
LR:  c0423d8c   XER: 00000000   CCR: 28242444   MQ:  00008000
DAR: 00000000 DSISR: 00800000        Syscall Result: eb793e60
 NIP [00000000c0423378] sysrq_handle_crash
 LR  [00000000c0423d8c] __handle_sysrq

 #0 [eb793e60] sysrq_handle_crash at c0423378
  : snip

Thanks,
Toshi

Date: Mon, 18 Jun 2012 11:56:35 +0900
Subject: [PATCH 1/4] use calloc() for nt_prstatus_percpu

The get_diskdump_regs_ppc() or get_netdump_regs_ppc() expect
nt_prstatus_percpu == NULL if notes can not be contained in dumpfile.
(crash_save_cpu() on IPI could not be worked by any reasons.)

Use calloc() to avoid unexpected segfault.

crash> bt
PID: 1054   TASK: eaf5ec00  CPU: 1   COMMAND: "bash"
Segmentation fault

crash> bt
PID: 1054   TASK: eaf5ec00  CPU: 1   COMMAND: "bash"
bt: cannot determine NT_PRSTATUS ELF note for panic task: eaf5ec00

Signed-off-by: Toshikazu Nakayama <nakayama.ts@xxxxxxxxxxxxxx>
---
 diskdump.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/diskdump.c b/diskdump.c
index e3f04e8..8accd3c 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -526,7 +526,7 @@ restart:
 			error(FATAL, "compressed kdump: cannot malloc notes"
 				" buffer\n");
 
-		if ((dd->nt_prstatus_percpu = malloc(NR_CPUS * sizeof(void*))) == NULL)
+		if ((dd->nt_prstatus_percpu = calloc(NR_CPUS, sizeof(void*))) == NULL)
 			error(FATAL, "compressed kdump: cannot malloc pointer"
 				" to NT_PRSTATUS notes\n");
 
-- 
1.7.0.4


Date: Mon, 18 Jun 2012 13:38:13 +0900
Subject: [PATCH 2/4] handle cpus which lost crash_notes.

Since the kexec crash_save_cpu() is called from IPI which
can perhaps be timeout by any reasons such like crash with interrupts off.
If lost, nt_prstatus_percpu[] is not mapped correctly and back trace
command is also failed by this invalid mapping.

[example]
                      CPU#0   CPU#1   CPU#2   CPU#3
---------------------------------------------------
crash_notes[]         saved   lost    lost    saved  [saving dump file]
---------------------------------------------------
nt_prstatus_percpu[]  note#0  note#3  null    null   [crash environs]

=> bt for CPU#1 task use CPU#3's note, bt for CPU#3 can not work out.

Signed-off-by: Toshikazu Nakayama <nakayama.ts@xxxxxxxxxxxxxx>
---
 diskdump.c |   66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/diskdump.c b/diskdump.c
index 8accd3c..daae52a 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -90,6 +90,60 @@ int dumpfile_is_split(void)
 	return KDUMP_SPLIT();
 }
 
+static int
+cpu_was_lost_crash_note(int cpu)
+{
+	int ret;
+	ulong crash_notes_ptr;
+	char *buf, *name;
+
+	ret = FALSE;
+
+	if (cpu < 0 || cpu > NR_CPUS)
+		error(FATAL, "cpu#%d is out of range\n", cpu);
+
+	if (!symbol_exists("crash_notes") ||
+	    !STRUCT_EXISTS("note_buf_t") || !STRUCT_EXISTS("elf_prstatus"))
+		goto out;
+
+	readmem(symbol_value("crash_notes"), KVADDR, &crash_notes_ptr,
+		sizeof(ulong), "crash_notes", FAULT_ON_ERROR);
+	if (!crash_notes_ptr)
+		goto out;
+
+	buf = GETBUF(STRUCT_SIZE("note_buf_t"));
+	if ((kt->flags & SMP) && (kt->flags &PER_CPU_OFF))
+		crash_notes_ptr += kt->__per_cpu_offset[cpu];
+	readmem(crash_notes_ptr, KVADDR, buf, STRUCT_SIZE("note_buf_t"),
+		"cpu crash_notes", FAULT_ON_ERROR);
+	if (BITS64()) {
+		Elf64_Nhdr *note64;
+
+		note64 = (Elf64_Nhdr *)buf;
+		name = (char *)(note64 + 1);
+		if (note64->n_type != NT_PRSTATUS ||
+		    note64->n_namesz != strlen("CORE") + 1 ||
+		    strncmp(name, "CORE", note64->n_namesz) ||
+		    note64->n_descsz != STRUCT_SIZE("elf_prstatus"))
+			ret = TRUE;
+	} else {
+		Elf32_Nhdr *note32;
+
+		note32 = (Elf32_Nhdr *)buf;
+		name = (char *)(note32 + 1);
+		if (note32->n_type != NT_PRSTATUS ||
+		    note32->n_namesz != strlen("CORE") + 1 ||
+		    strncmp(name, "CORE", note32->n_namesz) ||
+		    note32->n_descsz != STRUCT_SIZE("elf_prstatus"))
+			ret = TRUE;
+	}
+	FREEBUF(buf);
+out:
+	if (ret)
+		error(WARNING, "catch lost crash_notes at cpu#%d\n", cpu);
+	return ret;
+}
+
 void
 map_cpus_to_prstatus_kdump_cmprs(void)
 {
@@ -97,7 +151,8 @@ map_cpus_to_prstatus_kdump_cmprs(void)
 	int online, i, j, nrcpus;
 	size_t size;
 
-	if (!(online = get_cpus_online()) || (online == kt->cpus))
+	if (!(online = get_cpus_online()) ||
+	    (online == kt->cpus && online == dd->num_prstatus_notes))
 		return;
 
 	if (CRASHDEBUG(1))
@@ -117,7 +172,7 @@ map_cpus_to_prstatus_kdump_cmprs(void)
 	nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
 
 	for (i = 0, j = 0; i < nrcpus; i++) {
-		if (in_cpu_map(ONLINE, i))
+		if (in_cpu_map(ONLINE, i) && !cpu_was_lost_crash_note(i))
 			dd->nt_prstatus_percpu[i] = nt_ptr[j++];
 	}
 
@@ -1270,7 +1325,7 @@ dump_nt_prstatus_offset(FILE *fp)
 int
 __diskdump_memory_dump(FILE *fp)
 {
-	int i, others, dump_level;
+	int i, j, others, dump_level;
 	struct disk_dump_header *dh;
 	struct disk_dump_sub_header *dsh;
 	struct kdump_sub_header *kdsh;
@@ -1460,9 +1515,12 @@ __diskdump_memory_dump(FILE *fp)
 				dd->num_prstatus_notes);
 			fprintf(fp, "           notes_buf: %lx\n",
 				(ulong)dd->notes_buf);
-			for (i = 0; i < dd->num_prstatus_notes; i++) {
+			for (i = 0, j = 0; j < dd->num_prstatus_notes; i++) {
+				if (dd->nt_prstatus_percpu[i] == NULL)
+					continue;
 				fprintf(fp, "            notes[%d]: %lx\n",
 					i, (ulong)dd->nt_prstatus_percpu[i]);
+				j++;
 			}
 			dump_nt_prstatus_offset(fp);
 		}
-- 
1.7.0.4


Date: Mon, 18 Jun 2012 16:33:54 +0900
Subject: [PATCH 3/4] ppc: use kt->cpus instead of dd->num_prstatus_notes

Because dd->num_prstatus_notes is perhaps lost notes and
this condition aims to check whether SMP or not,
then should be better to use kt->cpus.

The "bt" for such cpu's active tasks can also be displayed as
"cannot determine NT_PRSTATUS ELF note ...".

Signed-off-by: Toshikazu Nakayama <nakayama.ts@xxxxxxxxxxxxxx>
---
 diskdump.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/diskdump.c b/diskdump.c
index daae52a..3de1032 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -1075,7 +1075,7 @@ get_diskdump_regs_ppc(struct bt_info *bt, ulong *eip, ulong *esp)
 
 	if (KDUMP_CMPRS_VALID() &&
 		(bt->task == tt->panic_task || 
-		(is_task_active(bt->task) && dd->num_prstatus_notes > 1))) {
+		(is_task_active(bt->task) && (kt->cpus > 1)))) {
 		note  = (Elf32_Nhdr*) dd->nt_prstatus_percpu[bt->tc->processor];
 		if (!note)
 			error(FATAL,
@@ -1085,7 +1085,7 @@ get_diskdump_regs_ppc(struct bt_info *bt, ulong *eip, ulong *esp)
 					"panic" : "active", bt->task);
 		len = sizeof(Elf32_Nhdr);
 		len = roundup(len + note->n_namesz, 4);
-		 bt->machdep = (void *)((char *)note + len +
+		bt->machdep = (void *)((char *)note + len +
 			MEMBER_OFFSET("elf_prstatus", "pr_reg"));
 	}
 
-- 
1.7.0.4


Date: Mon, 18 Jun 2012 17:14:33 +0900
Subject: [PATCH 4/4] move the common structures from machdep to kernel.c

Since the structure of note_buf_t and elf_prstatus are common for
other architecture like ppc, ppc64, ....,
kernel_init() is better position than machdep_init().

Signed-off-by: Toshikazu Nakayama <nakayama.ts@xxxxxxxxxxxxxx>
---
 arm.c      |    8 --------
 diskdump.c |   10 +++++-----
 kernel.c   |    3 +++
 x86.c      |    2 --
 x86_64.c   |    2 --
 5 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/arm.c b/arm.c
index 1065c9d..e0d9a3c 100644
--- a/arm.c
+++ b/arm.c
@@ -282,14 +282,6 @@ arm_init(int when)
 		MEMBER_OFFSET_INIT(thread_info_cpu_context,
 			"thread_info", "cpu_context");
 
-		/*
-		 * We need to have information about note_buf_t which is used to
-		 * hold ELF note containing registers and status of the thread
-		 * that panic'd.
-		 */
-		STRUCT_SIZE_INIT(note_buf, "note_buf_t");
-
-		STRUCT_SIZE_INIT(elf_prstatus, "elf_prstatus");
 		MEMBER_OFFSET_INIT(elf_prstatus_pr_pid, "elf_prstatus",
 				   "pr_pid");
 		MEMBER_OFFSET_INIT(elf_prstatus_pr_reg, "elf_prstatus",
diff --git a/diskdump.c b/diskdump.c
index 3de1032..4bf8560 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -103,7 +103,7 @@ cpu_was_lost_crash_note(int cpu)
 		error(FATAL, "cpu#%d is out of range\n", cpu);
 
 	if (!symbol_exists("crash_notes") ||
-	    !STRUCT_EXISTS("note_buf_t") || !STRUCT_EXISTS("elf_prstatus"))
+	    !VALID_STRUCT(note_buf) || !VALID_STRUCT(elf_prstatus))
 		goto out;
 
 	readmem(symbol_value("crash_notes"), KVADDR, &crash_notes_ptr,
@@ -111,10 +111,10 @@ cpu_was_lost_crash_note(int cpu)
 	if (!crash_notes_ptr)
 		goto out;
 
-	buf = GETBUF(STRUCT_SIZE("note_buf_t"));
+	buf = GETBUF(SIZE(note_buf));
 	if ((kt->flags & SMP) && (kt->flags &PER_CPU_OFF))
 		crash_notes_ptr += kt->__per_cpu_offset[cpu];
-	readmem(crash_notes_ptr, KVADDR, buf, STRUCT_SIZE("note_buf_t"),
+	readmem(crash_notes_ptr, KVADDR, buf, SIZE(note_buf),
 		"cpu crash_notes", FAULT_ON_ERROR);
 	if (BITS64()) {
 		Elf64_Nhdr *note64;
@@ -124,7 +124,7 @@ cpu_was_lost_crash_note(int cpu)
 		if (note64->n_type != NT_PRSTATUS ||
 		    note64->n_namesz != strlen("CORE") + 1 ||
 		    strncmp(name, "CORE", note64->n_namesz) ||
-		    note64->n_descsz != STRUCT_SIZE("elf_prstatus"))
+		    note64->n_descsz != SIZE(elf_prstatus))
 			ret = TRUE;
 	} else {
 		Elf32_Nhdr *note32;
@@ -134,7 +134,7 @@ cpu_was_lost_crash_note(int cpu)
 		if (note32->n_type != NT_PRSTATUS ||
 		    note32->n_namesz != strlen("CORE") + 1 ||
 		    strncmp(name, "CORE", note32->n_namesz) ||
-		    note32->n_descsz != STRUCT_SIZE("elf_prstatus"))
+		    note32->n_descsz != SIZE(elf_prstatus))
 			ret = TRUE;
 	}
 	FREEBUF(buf);
diff --git a/kernel.c b/kernel.c
index e11d1b7..3690969 100755
--- a/kernel.c
+++ b/kernel.c
@@ -609,6 +609,9 @@ kernel_init()
 
 	STRUCT_SIZE_INIT(mem_section, "mem_section");
 
+	STRUCT_SIZE_INIT(note_buf, "note_buf_t");
+	STRUCT_SIZE_INIT(elf_prstatus, "elf_prstatus");
+
 	BUG_bytes_init();
 	
 	kt->flags &= ~PRE_KERNEL_INIT;
diff --git a/x86.c b/x86.c
index 06ccb92..337c7ce 100755
--- a/x86.c
+++ b/x86.c
@@ -2014,8 +2014,6 @@ x86_init(int when)
 		else
 			machdep->machspec->page_protnone = _PAGE_PSE;
 
-		STRUCT_SIZE_INIT(note_buf, "note_buf_t");
-		STRUCT_SIZE_INIT(elf_prstatus, "elf_prstatus");
 		MEMBER_OFFSET_INIT(elf_prstatus_pr_reg, "elf_prstatus",
 				   "pr_reg");
 		STRUCT_SIZE_INIT(percpu_data, "percpu_data");
diff --git a/x86_64.c b/x86_64.c
index b6c32e9..9c82032 100755
--- a/x86_64.c
+++ b/x86_64.c
@@ -533,8 +533,6 @@ x86_64_init(int when)
 		else
 			machdep->machspec->page_protnone = _PAGE_PSE;
 
-		STRUCT_SIZE_INIT(note_buf, "note_buf_t");
-		STRUCT_SIZE_INIT(elf_prstatus, "elf_prstatus");
 		MEMBER_OFFSET_INIT(elf_prstatus_pr_reg, "elf_prstatus",
 				   "pr_reg");
 		STRUCT_SIZE_INIT(percpu_data, "percpu_data");
-- 
1.7.0.4


--
Crash-utility mailing list
Crash-utility@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/crash-utility

[Index of Archives]     [Fedora Development]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]     [Fedora Tools]

 

Powered by Linux