[PATCH v7 1/3] x86: Add classes to exception tables

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Starting with a patch from Andy Lutomirski <luto@xxxxxxxxxxxxxx>
that used linker relocation trickery to free up a couple of bits
in the "fixup" field of the exception table (and generalized the
uaccess_err hack to use one of the classes).

This patch allocates another one of the classes to provide
a mechanism to provide the fault number to the fixup code
in %rax.

Still one free class for the next brilliant idea. If more are
needed it should be possible to squeeze another bit or three
extending the same technique.

Originally-from: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx>
---
 arch/x86/include/asm/asm.h     | 102 +++++++++++++++++++++++++++++++----------
 arch/x86/include/asm/uaccess.h |  17 +++++--
 arch/x86/kernel/kprobes/core.c |   2 +-
 arch/x86/kernel/traps.c        |   6 +--
 arch/x86/mm/extable.c          |  66 ++++++++++++++++++--------
 arch/x86/mm/fault.c            |   2 +-
 6 files changed, 142 insertions(+), 53 deletions(-)

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 189679aba703..977273e36f87 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -43,19 +43,79 @@
 #define _ASM_DI		__ASM_REG(di)
 
 /* Exception table entry */
-#ifdef __ASSEMBLY__
-# define _ASM_EXTABLE(from,to)					\
-	.pushsection "__ex_table","a" ;				\
-	.balign 8 ;						\
-	.long (from) - . ;					\
-	.long (to) - . ;					\
-	.popsection
 
-# define _ASM_EXTABLE_EX(from,to)				\
-	.pushsection "__ex_table","a" ;				\
-	.balign 8 ;						\
-	.long (from) - . ;					\
-	.long (to) - . + 0x7ffffff0 ;				\
+#define	_EXTABLE_BIAS	0x20000000
+/*
+ * An exception table entry is 64 bits.  The first 32 bits are the offset
+ * from that entry to the potentially faulting instruction.  sortextable
+ * relies on that exact encoding.  The second 32 bits encode the fault
+ * handler address.
+ *
+ * We want to stick two extra bits of handler class into the fault handler
+ * address.  All of these are generated by relocations, so we can only
+ * rely on addition.
+ *
+ * The offset to the fixup is signed, and we're trying to use the high
+ * bits for a different purpose.  In C, we could just do:
+ *
+ * u32 class_and_offset = ((target - here) & 0x3fffffff) | class;
+ *
+ * Then, to decode it, we'd mask off the class and sign-extend to recover
+ * the offset.
+ *
+ * In asm, we can't do that, because this all gets laundered through the
+ * linker, and there's no relocation type that supports this chicanery.
+ * Instead we cheat a bit.  We first add a large number to the offset
+ * (0x20000000).  The result is still nominally signed, but now it's
+ * always positive, and the two high bits are always clear.  We can then
+ * set high bits by ordinary addition or subtraction instead of using
+ * bitwise operations.  As far as the linker is concerned, all we're
+ * doing is adding a large constant to the difference between here (".")
+ * and the target, and that's a valid relocation type.
+ *
+ * We therefore emit:
+ * (target - here) + (class) + 0x20000000
+ *
+ * This has the property that the two high bits are the class (see
+ * ex_class().
+ *
+ * To get the offset back we just mask off the class bits and subtract
+ * 0x20000000. See ex_fixup_addr().
+ */
+
+/*
+ * There are two bits of extable entry class giving four classes
+ */
+#define EXTABLE_CLASS_DEFAULT	0	/* standard uaccess fixup */
+#define EXTABLE_CLASS_FAULT	1	/* provide fault number as well as fixup */
+#define EXTABLE_CLASS_EX	2	/* uaccess + set uaccess_err */
+#define EXTABLE_CLASS_UNUSED	3	/* available for something else */
+
+/*
+ * The biases are the class constants + _EXTABLE_BIAS, as signed integers.
+ * This can't use ordinary arithmetic -- the assembler isn't that smart.
+ */
+#define _EXTABLE_BIAS_DEFAULT	_EXTABLE_BIAS
+#define _EXTABLE_BIAS_FAULT	_EXTABLE_BIAS + 0x40000000
+#define _EXTABLE_BIAS_EX	_EXTABLE_BIAS - 0x80000000
+#define _EXTABLE_BIAS_UNUSED	_EXTABLE_BIAS - 0x40000000
+
+#define _ASM_EXTABLE(from,to)						\
+	_ASM_EXTABLE_CLASS(from, to, _EXTABLE_BIAS_DEFAULT)
+
+#define _ASM_EXTABLE_FAULT(from,to)					\
+	_ASM_EXTABLE_CLASS(from, to, _EXTABLE_BIAS_FAULT)
+
+#define _ASM_EXTABLE_EX(from,to)					\
+	_ASM_EXTABLE_CLASS(from, to, _EXTABLE_BIAS_EX)
+
+#ifdef __ASSEMBLY__
+# define _EXPAND_EXTABLE_BIAS(x) x
+# define _ASM_EXTABLE_CLASS(from,to,bias)				\
+	.pushsection "__ex_table","a" ;					\
+	.balign 8 ;							\
+	.long (from) - . ;						\
+	.long (to) - . + _EXPAND_EXTABLE_BIAS(bias) ;			\
 	.popsection
 
 # define _ASM_NOKPROBE(entry)					\
@@ -89,18 +149,12 @@
 	.endm
 
 #else
-# define _ASM_EXTABLE(from,to)					\
-	" .pushsection \"__ex_table\",\"a\"\n"			\
-	" .balign 8\n"						\
-	" .long (" #from ") - .\n"				\
-	" .long (" #to ") - .\n"				\
-	" .popsection\n"
-
-# define _ASM_EXTABLE_EX(from,to)				\
-	" .pushsection \"__ex_table\",\"a\"\n"			\
-	" .balign 8\n"						\
-	" .long (" #from ") - .\n"				\
-	" .long (" #to ") - . + 0x7ffffff0\n"			\
+# define _EXPAND_EXTABLE_BIAS(x) #x
+# define _ASM_EXTABLE_CLASS(from,to,bias)				\
+	" .pushsection \"__ex_table\",\"a\"\n"				\
+	" .balign 8\n"							\
+	" .long (" #from ") - .\n"					\
+	" .long (" #to ") - . + " _EXPAND_EXTABLE_BIAS(bias) "\n"	\
 	" .popsection\n"
 /* For C file, we already have NOKPROBE_SYMBOL macro */
 #endif
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index a8df874f3e88..b023300cd6f0 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -93,9 +93,12 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
  * The exception table consists of pairs of addresses relative to the
  * exception table enty itself: the first is the address of an
  * instruction that is allowed to fault, and the second is the address
- * at which the program should continue.  No registers are modified,
- * so it is entirely up to the continuation code to figure out what to
- * do.
+ * at which the program should continue.  The exception table is linked
+ * soon after the fixup section, so we don't need a full 32-bit offset
+ * for the fixup. We steal the two upper bits so we can tag exception
+ * table entries with different classes. In the default class no registers
+ * are modified, so it is entirely up to the continuation code to figure
+ * out what to do.
  *
  * All the routines below use bits of fixup code that are out of line
  * with the main instruction path.  This means when everything is well,
@@ -110,7 +113,13 @@ struct exception_table_entry {
 #define ARCH_HAS_SORT_EXTABLE
 #define ARCH_HAS_SEARCH_EXTABLE
 
-extern int fixup_exception(struct pt_regs *regs);
+static inline unsigned int
+ex_class(const struct exception_table_entry *x)
+{
+	return (u32)x->fixup >> 30;
+}
+
+extern int fixup_exception(struct pt_regs *regs, int trapnr);
 extern int early_fixup_exception(unsigned long *ip);
 
 /*
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 1deffe6cc873..0f05deeff5ce 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -988,7 +988,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
 		 */
-		if (fixup_exception(regs))
+		if (fixup_exception(regs, trapnr))
 			return 1;
 
 		/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 346eec73f7db..df25081e5970 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -199,7 +199,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
 	}
 
 	if (!user_mode(regs)) {
-		if (!fixup_exception(regs)) {
+		if (!fixup_exception(regs, trapnr)) {
 			tsk->thread.error_code = error_code;
 			tsk->thread.trap_nr = trapnr;
 			die(str, regs, error_code);
@@ -453,7 +453,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
 
 	tsk = current;
 	if (!user_mode(regs)) {
-		if (fixup_exception(regs))
+		if (fixup_exception(regs, X86_TRAP_GP))
 			return;
 
 		tsk->thread.error_code = error_code;
@@ -699,7 +699,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
 	conditional_sti(regs);
 
 	if (!user_mode(regs)) {
-		if (!fixup_exception(regs)) {
+		if (!fixup_exception(regs, X86_TRAP_DE)) {
 			task->thread.error_code = error_code;
 			task->thread.trap_nr = trapnr;
 			die(str, regs, error_code);
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 903ec1e9c326..7a592ec193d5 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -11,13 +11,51 @@ ex_insn_addr(const struct exception_table_entry *x)
 static inline unsigned long
 ex_fixup_addr(const struct exception_table_entry *x)
 {
-	return (unsigned long)&x->fixup + x->fixup;
+	long offset = (long)((u32)x->fixup & 0x3fffffff) - (long)_EXTABLE_BIAS;
+	return (unsigned long)&x->fixup + offset;
 }
 
-int fixup_exception(struct pt_regs *regs)
+/* Fixup functions for each exception class */
+static int fix_class_default(const struct exception_table_entry *fixup,
+		      struct pt_regs *regs, int trapnr)
+{
+	regs->ip = ex_fixup_addr(fixup);
+	return 1;
+}
+static int fix_class_fault(const struct exception_table_entry *fixup,
+		      struct pt_regs *regs, int trapnr)
+{
+	regs->ip = ex_fixup_addr(fixup);
+	regs->ax = trapnr;
+	return 1;
+}
+static int fix_class_ex(const struct exception_table_entry *fixup,
+		      struct pt_regs *regs, int trapnr)
+{
+	/* Special hack for uaccess_err */
+	current_thread_info()->uaccess_err = 1;
+	regs->ip = ex_fixup_addr(fixup);
+	return 1;
+}
+static int fix_class_unused(const struct exception_table_entry *fixup,
+		      struct pt_regs *regs, int trapnr)
+{
+	/* can't happen unless exception table was corrupted */
+	BUG_ON(1);
+	return 0;
+}
+
+static int (*allclasses[])(const struct exception_table_entry *,
+			   struct pt_regs *, int) = {
+	[EXTABLE_CLASS_DEFAULT] = fix_class_default,
+	[EXTABLE_CLASS_FAULT] = fix_class_fault,
+	[EXTABLE_CLASS_EX] = fix_class_ex,
+	[EXTABLE_CLASS_UNUSED] = fix_class_unused
+};
+
+int fixup_exception(struct pt_regs *regs, int trapnr)
 {
 	const struct exception_table_entry *fixup;
-	unsigned long new_ip;
 
 #ifdef CONFIG_PNPBIOS
 	if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
@@ -34,17 +72,8 @@ int fixup_exception(struct pt_regs *regs)
 #endif
 
 	fixup = search_exception_tables(regs->ip);
-	if (fixup) {
-		new_ip = ex_fixup_addr(fixup);
-
-		if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
-			/* Special hack for uaccess_err */
-			current_thread_info()->uaccess_err = 1;
-			new_ip -= 0x7ffffff0;
-		}
-		regs->ip = new_ip;
-		return 1;
-	}
+	if (fixup)
+		return allclasses[ex_class(fixup)](fixup, regs, trapnr);
 
 	return 0;
 }
@@ -53,18 +82,15 @@ int fixup_exception(struct pt_regs *regs)
 int __init early_fixup_exception(unsigned long *ip)
 {
 	const struct exception_table_entry *fixup;
-	unsigned long new_ip;
 
 	fixup = search_exception_tables(*ip);
 	if (fixup) {
-		new_ip = ex_fixup_addr(fixup);
-
-		if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
-			/* uaccess handling not supported during early boot */
+		if (ex_class(fixup)) {
+			/* special handling not supported during early boot */
 			return 0;
 		}
 
-		*ip = new_ip;
+		*ip = ex_fixup_addr(fixup);
 		return 1;
 	}
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index eef44d9a3f77..495946c3f9dd 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -656,7 +656,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 	int sig;
 
 	/* Are we prepared to handle this kernel fault? */
-	if (fixup_exception(regs)) {
+	if (fixup_exception(regs, X86_TRAP_PF)) {
 		/*
 		 * Any interrupt that takes a fault gets the fixup. This makes
 		 * the below recursive fault logic only apply to a faults from
-- 
2.1.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]