[patch] MCA recovery: kernel context recovery table

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[patch] MCA recovery: kernel context recovery table

Memory errors encountered by user applications may surface
when the CPU is running in kernel context.  The current code
will not attempt recovery if the MCA surfaces in kernel 
context (privilage mode 0).  This patch adds a check for cases 
where the user initiated the load that surfaces in kernel 
interrupt code.

An example is a user process lauching a load from memory 
and the data in memory had bad ECC.  Before the bad data
gets to the CPU register, and interrupt comes in.  The 
code jumps to the IVT interrupt entry point and begins 
execution in kernel context.  The process of saving the
user registers (SAVE_REST) causes the bad data to be loaded
into a CPU register, triggering the MCA.  The MCA surfaces in
kernel context, even though the load was initiated from
user context.

As suggested by David and Tony, this patch uses an exception
table like approach, puting the tagged recovery addresses in
a searchable table.  One difference from the exception table 
is that MCAs do not surface in precise places (such as with
a TLB miss), so instead of tagging specific instructions,
address ranges are registers.  A single macro is used to do
the tagging, with the input parameter being the label
of the starting address and the macro being the ending
address.  This limits clutter in the code.

This patch only tags one spot, the interrupt ivt entry.
Testing showed that spot to be a "heavy hitter" with 
MCAs surfacing while saving user registers.  Other spots
can be added as needed by adding a single macro.

Signed-off-by: Russ Anderson (rja@xxxxxxx)

--------------------------------------------------------------
 arch/ia64/kernel/ivt.S         |    1 
 arch/ia64/kernel/mca.c         |   98 ++++++++++++++++++++++++++++++-----------
 arch/ia64/kernel/mca_drv.c     |   20 +++++---
 arch/ia64/kernel/mca_drv.h     |    7 ++
 arch/ia64/kernel/mca_drv_asm.S |    6 +-
 arch/ia64/kernel/vmlinux.lds.S |    9 +++
 include/asm-ia64/asmmacro.h    |   11 ++++
 7 files changed, 119 insertions(+), 33 deletions(-)

Index: test/arch/ia64/kernel/mca_drv.c
===================================================================
--- test.orig/arch/ia64/kernel/mca_drv.c	2006-02-10 13:09:51.982352916 -0600
+++ test/arch/ia64/kernel/mca_drv.c	2006-02-10 17:59:34.278072781 -0600
@@ -6,6 +6,7 @@
  * Copyright (C) Hidetoshi Seto (seto.hidetoshi@xxxxxxxxxxxxxx)
  * Copyright (C) 2005 Silicon Graphics, Inc
  * Copyright (C) 2005 Keith Owens <kaos@xxxxxxx>
+ * Copyright (C) 2006 Russ Anderson <rja@xxxxxxx>
  */
 #include <linux/config.h>
 #include <linux/types.h>
@@ -121,10 +122,12 @@ mca_page_isolate(unsigned long paddr)
  */
 
 void
-mca_handler_bh(unsigned long paddr)
+mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr)
 {
-	printk(KERN_DEBUG "OS_MCA: process [pid: %d](%s) encounters MCA.\n",
-		current->pid, current->comm);
+	printk(KERN_DEBUG "OS_MCA: process [cpu %d, pid: %d, uid: %d, "
+		"iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n",
+		raw_smp_processor_id(), current->pid, current->uid,
+		iip, ipsr, paddr, current->comm);
 
 	spin_lock(&mca_bh_lock);
 	switch (mca_page_isolate(paddr)) {
@@ -441,21 +444,25 @@ recover_from_read_error(slidx_table_t *s
 	if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate))
 		return 0;
 	psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr);
+	psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr);
 
 	/*
 	 *  Check the privilege level of interrupted context.
 	 *   If it is user-mode, then terminate affected process.
 	 */
-	if (psr1->cpl != 0) {
+
+	pmsa = sos->pal_min_state;
+	if (psr1->cpl != 0 || ((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) {
 		smei = peidx_bus_check(peidx, 0);
 		if (smei->valid.target_identifier) {
 			/*
 			 *  setup for resume to bottom half of MCA,
 			 * "mca_handler_bhhook"
 			 */
-			pmsa = sos->pal_min_state;
-			/* pass to bhhook as 1st argument (gr8) */
+			/* pass to bhhook as argument (gr8, ...) */
 			pmsa->pmsa_gr[8-1] = smei->target_identifier;
+			pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip;
+			pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr;
 			/* set interrupted return address (but no use) */
 			pmsa->pmsa_br0 = pmsa->pmsa_iip;
 			/* change resume address to bottom half */
@@ -465,6 +472,7 @@ recover_from_read_error(slidx_table_t *s
 			psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr;
 			psr2->cpl = 0;
 			psr2->ri  = 0;
+			psr2->bn  = 1;
 			psr2->i  = 0;
 
 			return 1;
Index: test/arch/ia64/kernel/mca_drv.h
===================================================================
--- test.orig/arch/ia64/kernel/mca_drv.h	2006-02-10 13:09:51.982352916 -0600
+++ test/arch/ia64/kernel/mca_drv.h	2006-02-10 17:59:34.285884520 -0600
@@ -111,3 +111,10 @@ typedef struct slidx_table {
 	slidx_foreach_entry(__pos, &((slidx)->sec)) { __count++; }\
 	__count; })
 
+struct mca_table_entry {
+	int start_addr;	/* location-relative starting address of MCA recoverable range */
+	int end_addr;	/* location-relative ending address of MCA recoverable range */
+};
+
+extern const struct mca_table_entry *search_mca_tables (unsigned long addr);
+extern int mca_recover_range(unsigned long);
Index: test/arch/ia64/kernel/mca_drv_asm.S
===================================================================
--- test.orig/arch/ia64/kernel/mca_drv_asm.S	2006-02-10 13:09:51.982352916 -0600
+++ test/arch/ia64/kernel/mca_drv_asm.S	2006-02-10 17:59:34.288813922 -0600
@@ -19,7 +19,7 @@ GLOBAL_ENTRY(mca_handler_bhhook)
 	;;
 	clrrrb
 	;;						
-	alloc	r16=ar.pfs,0,2,1,0	// make a new frame
+	alloc	r16=ar.pfs,0,2,3,0	// make a new frame
 	;;
 	mov	ar.rsc=0
 	;;
@@ -40,11 +40,13 @@ GLOBAL_ENTRY(mca_handler_bhhook)
 	movl	loc1=mca_handler_bh	// recovery C function
 	;;
 	mov	out0=r8			// poisoned address
+	mov	out1=r9			// iip
+	mov	out2=r10		// psr
 	mov	b6=loc1
 	;;
 	mov	loc1=rp
 	;;
-	ssm	psr.i
+	ssm	psr.i | psr.ic
 	;;
 	br.call.sptk.many rp=b6		// does not return ...
 	;;
Index: test/arch/ia64/kernel/ivt.S
===================================================================
--- test.orig/arch/ia64/kernel/ivt.S	2006-02-10 13:09:51.978447050 -0600
+++ test/arch/ia64/kernel/ivt.S	2006-02-10 17:59:34.296625660 -0600
@@ -862,6 +862,7 @@ ENTRY(interrupt)
 	;;
 	SAVE_REST
 	;;
+	MCA_RECOVER_RANGE(interrupt)
 	alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
 	mov out0=cr.ivr		// pass cr.ivr as first arg
 	add out1=16,sp		// pass pointer to pt_regs as second arg
Index: test/arch/ia64/kernel/vmlinux.lds.S
===================================================================
--- test.orig/arch/ia64/kernel/vmlinux.lds.S	2006-02-10 13:09:51.999929316 -0600
+++ test/arch/ia64/kernel/vmlinux.lds.S	2006-02-10 17:59:34.297602128 -0600
@@ -70,6 +70,15 @@ SECTIONS
 	  __stop___ex_table = .;
 	}
 
+  /* MCA table */
+  . = ALIGN(16);
+  __mca_table : AT(ADDR(__mca_table) - LOAD_OFFSET)
+	{
+	  __start___mca_table = .;
+	  *(__mca_table)
+	  __stop___mca_table = .;
+	}
+
   .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET)
 	{
 	  __start___vtop_patchlist = .;
Index: test/include/asm-ia64/asmmacro.h
===================================================================
--- test.orig/include/asm-ia64/asmmacro.h	2006-02-10 13:09:56.878356655 -0600
+++ test/include/asm-ia64/asmmacro.h	2006-02-24 13:56:35.528818597 -0600
@@ -51,6 +51,17 @@ name:
   [99:]	x
 
 /*
+ * Tag MCA recoverable instruction ranges.
+ */
+
+	.section "__mca_table", "a"		// declare section & section attributes
+	.previous
+
+# define MCA_RECOVER_RANGE(y)			\
+	.xdata4 "__mca_table", y-., 99f-.;	\
+  [99:]
+
+/*
  * Mark instructions that need a load of a virtual address patched to be
  * a load of a physical address.  We use this either in critical performance
  * path (ivt.S - TLB miss processing) or in places where it might not be
Index: test/arch/ia64/kernel/mca.c
===================================================================
--- test.orig/arch/ia64/kernel/mca.c	2006-02-10 13:09:51.980399983 -0600
+++ test/arch/ia64/kernel/mca.c	2006-02-23 16:52:49.392092270 -0600
@@ -83,6 +83,7 @@
 #include <asm/irq.h>
 #include <asm/hw_irq.h>
 
+#include "mca_drv.h"
 #include "entry.h"
 
 #if defined(IA64_MCA_DEBUG_INFO)
@@ -282,6 +283,50 @@ ia64_mca_log_sal_error_record(int sal_in
 }
 
 /*
+ * search_mca_table
+ *  See if the MCA surfaced in an instruction range
+ *  that has been tagged as recoverable.
+ *
+ *  Inputs
+ *	first	First address range to check
+ *	last	Last address range to check
+ *	ip	Instruction pointer, address we are looking for
+ *
+ * Return value:
+ *      1 on Success (in the table)/ 0 on Failure (not in the  table)
+ */
+int
+search_mca_table (const struct mca_table_entry *first,
+                const struct mca_table_entry *last,
+                unsigned long ip)
+{
+        const struct mca_table_entry *curr;
+        u64 curr_start, curr_end;
+
+        curr = first;
+        while (curr <= last) {
+                curr_start = (u64) &curr->start_addr + curr->start_addr;
+                curr_end = (u64) &curr->end_addr + curr->end_addr;
+
+                if ((ip >= curr_start) && (ip <= curr_end)) {
+                        return 1;
+                }
+                curr++;
+        }
+        return 0;
+}
+
+/* Given an address, look for it in the mca tables. */
+int mca_recover_range(unsigned long addr)
+{
+	extern struct mca_table_entry __start___mca_table[];
+	extern struct mca_table_entry __stop___mca_table[];
+
+	return search_mca_table(__start___mca_table, __stop___mca_table-1, addr);
+}
+EXPORT_SYMBOL(mca_recover_range);
+
+/*
  * platform dependent error handling
  */
 #ifndef PLATFORM_MCA_HANDLERS
@@ -723,31 +768,34 @@ ia64_mca_modify_original_stack(struct pt
 		msg = "occurred in user space";
 		goto no_mod;
 	}
-	if (r13 != sos->prev_IA64_KR_CURRENT) {
-		msg = "inconsistent previous current and r13";
-		goto no_mod;
-	}
-	if ((r12 - r13) >= KERNEL_STACK_SIZE) {
-		msg = "inconsistent r12 and r13";
-		goto no_mod;
-	}
-	if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) {
-		msg = "inconsistent ar.bspstore and r13";
-		goto no_mod;
-	}
-	va.p = old_bspstore;
-	if (va.f.reg < 5) {
-		msg = "old_bspstore is in the wrong region";
-		goto no_mod;
-	}
-	if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) {
-		msg = "inconsistent ar.bsp and r13";
-		goto no_mod;
-	}
-	size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8;
-	if (ar_bspstore + size > r12) {
-		msg = "no room for blocked state";
-		goto no_mod;
+
+	if (!mca_recover_range(ms->pmsa_iip)) {
+		if (r13 != sos->prev_IA64_KR_CURRENT) {
+			msg = "inconsistent previous current and r13";
+			goto no_mod;
+		}
+		if ((r12 - r13) >= KERNEL_STACK_SIZE) {
+			msg = "inconsistent r12 and r13";
+			goto no_mod;
+		}
+		if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) {
+			msg = "inconsistent ar.bspstore and r13";
+			goto no_mod;
+		}
+		va.p = old_bspstore;
+		if (va.f.reg < 5) {
+			msg = "old_bspstore is in the wrong region";
+			goto no_mod;
+		}
+		if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) {
+			msg = "inconsistent ar.bsp and r13";
+			goto no_mod;
+		}
+		size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8;
+		if (ar_bspstore + size > r12) {
+			msg = "no room for blocked state";
+			goto no_mod;
+		}
 	}
 
 	/* Change the comm field on the MCA/INT task to include the pid that
-- 
Russ Anderson, OS RAS/Partitioning Project Lead  
SGI - Silicon Graphics Inc          rja@xxxxxxx
-
: send the line "unsubscribe linux-ia64" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel]     [Sparc Linux]     [DCCP]     [Linux ARM]     [Yosemite News]     [Linux SCSI]     [Linux x86_64]     [Linux for Ham Radio]

  Powered by Linux