[PATCH] Add support for 'foreign' page sizes in kdump dumps

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This affects only ppc64 and ia64 since that are the only architectures
that have a page size that is configurable at runtime by the system (which means
at boot by the kernel).

It also only affects dumps of the formats diskdump (which includes kdump
compressed dumps created by makedumpfile) and netdump (which includes kdump
ELF dumps copied from /proc/vmcore without any filtering applied and created
by makedumpfile with the -E option).

The patch reads the page size from the diskdump header or from the VMCOREINFO
in case of netdump (if it's there). For ia64 it also evaluates the page size
of the zero page to *change* the page size. In the past id didn't change the
page size, it only printed an error.

The patch has been tested on ppc64 (4k vs. 64k), ia64 (16k vs. 64k) and x86-64
(always 4k). It has been tested for compilation on i386, x86-64, ia64, PPC,
ppc64, s390 and s390x. Everything on a SLES 11/openSUSE 11.1 code base.


Signed-off-by: Bernhard Walle <bwalle@xxxxxxx>


4 files changed, 182 insertions(+), 92 deletions(-)
diskdump.c |   26 ++++------
ia64.c     |  146 ++++++++++++++++++++++++++++--------------------------------
netdump.c  |  100 +++++++++++++++++++++++++++++++++++++++++
netdump.h  |    2 


This affects only ppc64 and ia64 since that are the only architectures
that have a page size that is configurable at runtime by the system (which means
at boot by the kernel).

It also only affects dumps of the formats diskdump (which includes kdump
compressed dumps created by makedumpfile) and netdump (which includes kdump
ELF dumps copied from /proc/vmcore without any filtering applied and created
by makedumpfile with the -E option).

The patch reads the page size from the diskdump header or from the VMCOREINFO
in case of netdump (if it's there). For ia64 it also evaluates the page size
of the zero page to *change* the page size. In the past id didn't change the
page size, it only printed an error.

The patch has been tested on ppc64 (4k vs. 64k), ia64 (16k vs. 64k) and x86-64
(always 4k). It has been tested for compilation on i386, x86-64, ia64, PPC,
ppc64, s390 and s390x. Everything on a SLES 11/openSUSE 11.1 code base.


Signed-off-by: Bernhard Walle <bwalle@xxxxxxx>

diff --git a/diskdump.c b/diskdump.c
--- a/diskdump.c
+++ b/diskdump.c
@@ -107,7 +107,7 @@
 	struct disk_dump_sub_header *sub_header = NULL;
 	struct kdump_sub_header *sub_header_kdump = NULL;
 	int bitmap_len;
-	const int block_size = (int)sysconf(_SC_PAGESIZE);
+	int block_size = (int)sysconf(_SC_PAGESIZE);
 	off_t offset;
 	const off_t failed = (off_t)-1;
 	ulong pfn;
@@ -116,7 +116,8 @@
 	if (block_size < 0)
 		return FALSE;
 
-	if ((header = malloc(block_size)) == NULL)
+restart:
+	if ((header = realloc(header, block_size)) == NULL)
 		error(FATAL, "diskdump / compressed kdump: cannot malloc block_size buffer\n");
 
 	if (lseek(dd->dfd, 0, SEEK_SET) == failed) {
@@ -166,21 +167,14 @@
 		goto err;
 
 	if (header->block_size != block_size) {
-		error(INFO, "%s: block size in the dump header does not match"
-	            " with system page size\n",
-			DISKDUMP_VALID() ? "diskdump" : "compressed kdump");
-		goto err;
+		block_size = header->block_size;
+		if (CRASHDEBUG(1)) {
+			fprintf(fp, "Retrying with different block size: %d\n", header->block_size);
+		}
+		goto restart;
 	}
-	dd->block_size  = block_size;
-	dd->block_shift = ffs(block_size) - 1;
-
-	if (sizeof(*header) + sizeof(void *) * header->nr_cpus > block_size ||
-	    header->nr_cpus <= 0) {
-		error(INFO, "%s: invalid nr_cpus value: %d\n", 
-			DISKDUMP_VALID() ? "diskdump" : "compressed kdump",
-			header->nr_cpus);
-		goto err;
-	}
+	dd->block_size  = header->block_size;
+	dd->block_shift = ffs(header->block_size) - 1;
 
 	/* read sub header */
 	offset = (off_t)block_size;
diff --git a/ia64.c b/ia64.c
--- a/ia64.c
+++ b/ia64.c
@@ -82,6 +82,65 @@
 
 struct machine_specific ia64_machine_specific = { 0 };
 
+
+/*
+ * Helper function to set the page size on ia64. This function may be
+ * called multiple times.
+ */
+static void
+ia64_set_pagesize(int pagesize)
+{
+	machdep->pagesize = pagesize;
+	machdep->pageshift = ffs(pagesize) - 1;
+	machdep->pageoffset = pagesize - 1;
+	machdep->pagemask = ~(machdep->pageoffset);
+
+	switch (pagesize)
+	{
+	case 4096:
+		machdep->stacksize = (power(2, 3) * PAGESIZE());
+		break;
+	case 8192:
+		machdep->stacksize = (power(2, 2) * PAGESIZE());
+		break;
+	case 16384:
+		machdep->stacksize = (power(2, 1) * PAGESIZE());
+		break;
+	case 65536:
+		machdep->stacksize = (power(2, 0) * PAGESIZE());
+		break;
+	default:
+		/* 4K, 8K, 16K and 64K are the only valid page sizes on ia64 */
+		error(FATAL, "Try to set invalid page size: %d", pagesize);
+		break;
+	}
+
+	if ((machdep->pgd = (char *)realloc(machdep->pgd, pagesize)) == NULL)
+		error(FATAL, "cannot malloc pgd space.");
+	if ((machdep->pud = (char *)realloc(machdep->pud, pagesize)) == NULL)
+		error(FATAL, "cannot malloc pud space.");
+	if ((machdep->pmd = (char *)realloc(machdep->pmd, pagesize)) == NULL)
+		error(FATAL, "cannot malloc pmd space.");
+	if ((machdep->ptbl = (char *)realloc(machdep->ptbl, pagesize)) == NULL)
+		error(FATAL, "cannot malloc ptbl space.");
+}
+
+
+static void
+ia64_check_adjust_pagesize(void)
+{
+	struct syment *sp, *spn;
+
+	if ((sp = symbol_search("empty_zero_page")) &&
+			(spn = next_symbol(NULL, sp)) && 
+		    	((spn->value - sp->value) != PAGESIZE())) {
+
+		error(INFO, "Adjusting page size based on the zero page (to %d). That may cause trouble.\n",
+			spn->value - sp->value);
+		ia64_set_pagesize(spn->value - sp->value);
+	}
+}
+
 void
 ia64_init(int when)
 {
@@ -108,36 +167,7 @@
 		machdep->machspec = &ia64_machine_specific;
 		if (pc->flags & KERNEL_DEBUG_QUERY)
 			return;
-                machdep->pagesize = memory_page_size();
-                machdep->pageshift = ffs(machdep->pagesize) - 1;
-                machdep->pageoffset = machdep->pagesize - 1;
-                machdep->pagemask = ~(machdep->pageoffset);
-		switch (machdep->pagesize)
-		{
-		case 4096:
-			machdep->stacksize = (power(2, 3) * PAGESIZE());
-			break;
-		case 8192:
-			machdep->stacksize = (power(2, 2) * PAGESIZE());
-			break;
-		case 16384:
-			machdep->stacksize = (power(2, 1) * PAGESIZE());
-			break;
-		case 65536:
-			machdep->stacksize = (power(2, 0) * PAGESIZE());
-			break;
-		default:
-			machdep->stacksize = 32*1024;
-			break;
-		}
-                if ((machdep->pgd = (char *)malloc(PAGESIZE())) == NULL)
-                        error(FATAL, "cannot malloc pgd space.");
-		if ((machdep->pud = (char *)malloc(PAGESIZE())) == NULL)
-			error(FATAL, "cannot malloc pud space.");
-                if ((machdep->pmd = (char *)malloc(PAGESIZE())) == NULL)
-                        error(FATAL, "cannot malloc pmd space.");
-                if ((machdep->ptbl = (char *)malloc(PAGESIZE())) == NULL)
-                        error(FATAL, "cannot malloc ptbl space.");
+                ia64_set_pagesize(memory_page_size());
                 machdep->last_pgd_read = 0;
                 machdep->last_pud_read = 0;
                 machdep->last_pmd_read = 0;
@@ -155,22 +185,13 @@
 
 		if (pc->flags & KERNEL_DEBUG_QUERY)
 			return;
-		
+
 		/*
-		 * Until the kernel core dump and va_server library code
-		 * do the right thing with respect to the configured page size,
-		 * try to recognize a fatal inequity between the compiled-in 
-		 * page size and the page size used by the kernel.
-		 */ 
-		
-
-		if ((sp = symbol_search("empty_zero_page")) &&
-		    (spn = next_symbol(NULL, sp)) && 
-		    ((spn->value - sp->value) != PAGESIZE())) 
-			error(FATAL, 
-	        "compiled-in page size: %d  (apparent) kernel page size: %ld\n",
-				PAGESIZE(), spn->value - sp->value);
-
+		 * Check if the page size of the kernel matches the current
+		 * page size. If not, adjust it.
+		 */
+		ia64_check_adjust_pagesize();
+		
                 machdep->kvbase = KERNEL_VMALLOC_BASE;
 		machdep->identity_map_base = KERNEL_CACHED_BASE;
                 machdep->is_kvaddr = generic_is_kvaddr;
@@ -4196,6 +4217,8 @@
 		return 0;
 }
 
+
+
 static void
 ia64_init_hyper(int when)
 {
@@ -4217,36 +4240,7 @@
 		machdep->machspec = &ia64_machine_specific;
 		if (pc->flags & KERNEL_DEBUG_QUERY)
 			return;
-                machdep->pagesize = memory_page_size();
-                machdep->pageshift = ffs(machdep->pagesize) - 1;
-                machdep->pageoffset = machdep->pagesize - 1;
-                machdep->pagemask = ~(machdep->pageoffset);
-		switch (machdep->pagesize)
-		{
-		case 4096:
-			machdep->stacksize = (power(2, 3) * PAGESIZE());
-			break;
-		case 8192:
-			machdep->stacksize = (power(2, 2) * PAGESIZE());
-			break;
-		case 16384:
-			machdep->stacksize = (power(2, 1) * PAGESIZE());
-			break;
-		case 65536:
-			machdep->stacksize = (power(2, 0) * PAGESIZE());
-			break;
-		default:
-			machdep->stacksize = 32*1024;
-			break;
-		}
-                if ((machdep->pgd = (char *)malloc(PAGESIZE())) == NULL)
-                        error(FATAL, "cannot malloc pgd space.");
-		if ((machdep->pud = (char *)malloc(PAGESIZE())) == NULL)
-			error(FATAL, "cannot malloc pud space.");
-                if ((machdep->pmd = (char *)malloc(PAGESIZE())) == NULL)
-                        error(FATAL, "cannot malloc pmd space.");
-                if ((machdep->ptbl = (char *)malloc(PAGESIZE())) == NULL)
-                        error(FATAL, "cannot malloc ptbl space.");
+                ia64_set_pagesize(memory_page_size());
                 machdep->last_pgd_read = 0;
                 machdep->last_pud_read = 0;
                 machdep->last_pmd_read = 0;
@@ -4263,7 +4257,7 @@
 
 		if (pc->flags & KERNEL_DEBUG_QUERY)
 			return;
-		
+
                 machdep->kvbase = HYPERVISOR_VIRT_START;
 		machdep->identity_map_base = HYPERVISOR_VIRT_START;
                 machdep->is_kvaddr = ia64_is_kvaddr_hyper;
diff --git a/netdump.c b/netdump.c
--- a/netdump.c
+++ b/netdump.c
@@ -1392,6 +1392,93 @@
 }
 
 /*
+ * VMCOREINFO
+ *
+ * This is a ELF note intented for makedumpfile that is exported by the
+ * kernel that crashes and presented as ELF note to the /proc/vmcore
+ * of the panic kernel.
+ */
+
+#define VMCOREINFO_NOTE_NAME        "VMCOREINFO"
+#define VMCOREINFO_NOTE_NAME_BYTES  (sizeof(VMCOREINFO_NOTE_NAME))
+
+/*
+ * Reads a string value from VMCOREINFO.
+ *
+ * Returns a string (that has to be freed by the caller) that contains the
+ * value for key or NULL if the key has not been found.
+ */
+static char *
+vmcoreinfo_read_string(const char *key)
+{
+	int i, j;
+	size_t key_length = strlen(key);
+	char *vmcoreinfo = (char *)nd->vmcoreinfo;
+	char *value = NULL;
+
+	if (!nd->vmcoreinfo) {
+		return NULL;
+	}
+
+	/* the '+ 1' is the equal sign */
+	for (i = 0; i < (nd->size_vmcoreinfo - key_length + 1); i++) {
+		/*
+		 * we must also check if we're at the beginning of VMCOREINFO or
+		 * the separating newline is there ... and of course if we have
+		 * a equal sign after the key
+		 */
+		if (strncmp(vmcoreinfo+i, key, key_length) == 0 &&
+				(i == 0 || vmcoreinfo[i-1] == '\n') &&
+				(vmcoreinfo[i+key_length] == '=') ) {
+
+			int end = -1;
+			size_t value_length;
+
+			/* found  ... search for the next newline */
+			for (j = i + key_length + 1; j < nd->size_vmcoreinfo; j++) {
+				if (vmcoreinfo[j] == '\n') {
+					end = j;
+					break;
+				}
+			}
+
+			/* if we didn't find an end, we assume it's the end of VMCOREINFO */
+			if (end == -1) {
+				/* we point after the end */
+				end = nd->size_vmcoreinfo + 1;
+			}
+
+			value_length = end - (1+ i + key_length);
+			value = malloc(value_length);
+			if (value) {
+				strncpy(value, vmcoreinfo + i + key_length + 1, value_length);
+			}
+			break;
+		}
+	}
+
+	return value;
+}
+
+/*
+ * Reads an integer value from VMCOREINFO.
+ */
+static long
+vmcoreinfo_read_integer(const char *key, long default_value)
+{
+	char *string;
+	long retval = default_value;
+
+	string = vmcoreinfo_read_string(key);
+	if (string) {
+		retval = atol(string);
+		free(string);
+	}
+
+	return retval;
+}
+
+/*
  *  Dump a note section header -- the actual data is defined by netdump
  */
 
@@ -1420,6 +1507,19 @@
         netdump_print("                 n_type: %lx ", note->n_type);
 	switch (note->n_type)
 	{
+        case 0: /* unknown, used for VMCOREINFO */
+
+		if (strncmp(VMCOREINFO_NOTE_NAME, (char *)note+sizeof(Elf64_Nhdr),
+				VMCOREINFO_NOTE_NAME_BYTES) == 0) {
+
+			/* we have VMCOREINFO */
+			nd->vmcoreinfo = (char *)nd->elf64 + offset +
+				(sizeof(Elf64_Nhdr) + ((note->n_namesz + 3) & ~3));
+			nd->size_vmcoreinfo = note->n_descsz;
+			nd->page_size = vmcoreinfo_read_integer("PAGESIZE", 0);
+		}
+		break;
+
 	case NT_PRSTATUS:
 		netdump_print("(NT_PRSTATUS)\n");
 		if (store) { 
diff --git a/netdump.h b/netdump.h
--- a/netdump.h
+++ b/netdump.h
@@ -66,6 +66,8 @@
 	uint num_prstatus_notes;
 	void *nt_prstatus_percpu[NR_CPUS];
 	struct xen_kdump_data *xen_kdump_data;
+	void *vmcoreinfo;
+	uint size_vmcoreinfo;
 };
 
 /*
--
Crash-utility mailing list
Crash-utility@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/crash-utility

[Index of Archives]     [Fedora Development]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]     [Fedora Tools]

 

Powered by Linux