Starting with a patch from Andy Lutomirski <luto@xxxxxxxxxxxxxx> that used linker relocation trickery to free up a couple of bits in the "fixup" field of the exception table (and generalized the uaccess_err hack to use one of the classes). This patch allocates another one of the classes to provide a mechanism to provide the fault number to the fixup code in %rax. Still one free class for the next brilliant idea. If more are needed it should be possible to squeeze another bit or three extending the same technique. Originally-from: Andy Lutomirski <luto@xxxxxxxxxxxxxx> Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx> --- arch/x86/include/asm/asm.h | 102 +++++++++++++++++++++++++++++++---------- arch/x86/include/asm/uaccess.h | 17 +++++-- arch/x86/kernel/kprobes/core.c | 2 +- arch/x86/kernel/traps.c | 6 +-- arch/x86/mm/extable.c | 66 ++++++++++++++++++-------- arch/x86/mm/fault.c | 2 +- 6 files changed, 142 insertions(+), 53 deletions(-) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 189679aba703..977273e36f87 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -43,19 +43,79 @@ #define _ASM_DI __ASM_REG(di) /* Exception table entry */ -#ifdef __ASSEMBLY__ -# define _ASM_EXTABLE(from,to) \ - .pushsection "__ex_table","a" ; \ - .balign 8 ; \ - .long (from) - . ; \ - .long (to) - . ; \ - .popsection -# define _ASM_EXTABLE_EX(from,to) \ - .pushsection "__ex_table","a" ; \ - .balign 8 ; \ - .long (from) - . ; \ - .long (to) - . + 0x7ffffff0 ; \ +#define _EXTABLE_BIAS 0x20000000 +/* + * An exception table entry is 64 bits. The first 32 bits are the offset + * from that entry to the potentially faulting instruction. sortextable + * relies on that exact encoding. The second 32 bits encode the fault + * handler address. + * + * We want to stick two extra bits of handler class into the fault handler + * address. All of these are generated by relocations, so we can only + * rely on addition. + * + * The offset to the fixup is signed, and we're trying to use the high + * bits for a different purpose. In C, we could just do: + * + * u32 class_and_offset = ((target - here) & 0x3fffffff) | class; + * + * Then, to decode it, we'd mask off the class and sign-extend to recover + * the offset. + * + * In asm, we can't do that, because this all gets laundered through the + * linker, and there's no relocation type that supports this chicanery. + * Instead we cheat a bit. We first add a large number to the offset + * (0x20000000). The result is still nominally signed, but now it's + * always positive, and the two high bits are always clear. We can then + * set high bits by ordinary addition or subtraction instead of using + * bitwise operations. As far as the linker is concerned, all we're + * doing is adding a large constant to the difference between here (".") + * and the target, and that's a valid relocation type. + * + * We therefore emit: + * (target - here) + (class) + 0x20000000 + * + * This has the property that the two high bits are the class (see + * ex_class(). + * + * To get the offset back we just mask off the class bits and subtract + * 0x20000000. See ex_fixup_addr(). + */ + +/* + * There are two bits of extable entry class giving four classes + */ +#define EXTABLE_CLASS_DEFAULT 0 /* standard uaccess fixup */ +#define EXTABLE_CLASS_FAULT 1 /* provide fault number as well as fixup */ +#define EXTABLE_CLASS_EX 2 /* uaccess + set uaccess_err */ +#define EXTABLE_CLASS_UNUSED 3 /* available for something else */ + +/* + * The biases are the class constants + _EXTABLE_BIAS, as signed integers. + * This can't use ordinary arithmetic -- the assembler isn't that smart. + */ +#define _EXTABLE_BIAS_DEFAULT _EXTABLE_BIAS +#define _EXTABLE_BIAS_FAULT _EXTABLE_BIAS + 0x40000000 +#define _EXTABLE_BIAS_EX _EXTABLE_BIAS - 0x80000000 +#define _EXTABLE_BIAS_UNUSED _EXTABLE_BIAS - 0x40000000 + +#define _ASM_EXTABLE(from,to) \ + _ASM_EXTABLE_CLASS(from, to, _EXTABLE_BIAS_DEFAULT) + +#define _ASM_EXTABLE_FAULT(from,to) \ + _ASM_EXTABLE_CLASS(from, to, _EXTABLE_BIAS_FAULT) + +#define _ASM_EXTABLE_EX(from,to) \ + _ASM_EXTABLE_CLASS(from, to, _EXTABLE_BIAS_EX) + +#ifdef __ASSEMBLY__ +# define _EXPAND_EXTABLE_BIAS(x) x +# define _ASM_EXTABLE_CLASS(from,to,bias) \ + .pushsection "__ex_table","a" ; \ + .balign 8 ; \ + .long (from) - . ; \ + .long (to) - . + _EXPAND_EXTABLE_BIAS(bias) ; \ .popsection # define _ASM_NOKPROBE(entry) \ @@ -89,18 +149,12 @@ .endm #else -# define _ASM_EXTABLE(from,to) \ - " .pushsection \"__ex_table\",\"a\"\n" \ - " .balign 8\n" \ - " .long (" #from ") - .\n" \ - " .long (" #to ") - .\n" \ - " .popsection\n" - -# define _ASM_EXTABLE_EX(from,to) \ - " .pushsection \"__ex_table\",\"a\"\n" \ - " .balign 8\n" \ - " .long (" #from ") - .\n" \ - " .long (" #to ") - . + 0x7ffffff0\n" \ +# define _EXPAND_EXTABLE_BIAS(x) #x +# define _ASM_EXTABLE_CLASS(from,to,bias) \ + " .pushsection \"__ex_table\",\"a\"\n" \ + " .balign 8\n" \ + " .long (" #from ") - .\n" \ + " .long (" #to ") - . + " _EXPAND_EXTABLE_BIAS(bias) "\n" \ " .popsection\n" /* For C file, we already have NOKPROBE_SYMBOL macro */ #endif diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index a8df874f3e88..b023300cd6f0 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -93,9 +93,12 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un * The exception table consists of pairs of addresses relative to the * exception table enty itself: the first is the address of an * instruction that is allowed to fault, and the second is the address - * at which the program should continue. No registers are modified, - * so it is entirely up to the continuation code to figure out what to - * do. + * at which the program should continue. The exception table is linked + * soon after the fixup section, so we don't need a full 32-bit offset + * for the fixup. We steal the two upper bits so we can tag exception + * table entries with different classes. In the default class no registers + * are modified, so it is entirely up to the continuation code to figure + * out what to do. * * All the routines below use bits of fixup code that are out of line * with the main instruction path. This means when everything is well, @@ -110,7 +113,13 @@ struct exception_table_entry { #define ARCH_HAS_SORT_EXTABLE #define ARCH_HAS_SEARCH_EXTABLE -extern int fixup_exception(struct pt_regs *regs); +static inline unsigned int +ex_class(const struct exception_table_entry *x) +{ + return (u32)x->fixup >> 30; +} + +extern int fixup_exception(struct pt_regs *regs, int trapnr); extern int early_fixup_exception(unsigned long *ip); /* diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 1deffe6cc873..0f05deeff5ce 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -988,7 +988,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) * In case the user-specified fault handler returned * zero, try to fix up. */ - if (fixup_exception(regs)) + if (fixup_exception(regs, trapnr)) return 1; /* diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 346eec73f7db..df25081e5970 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -199,7 +199,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, } if (!user_mode(regs)) { - if (!fixup_exception(regs)) { + if (!fixup_exception(regs, trapnr)) { tsk->thread.error_code = error_code; tsk->thread.trap_nr = trapnr; die(str, regs, error_code); @@ -453,7 +453,7 @@ do_general_protection(struct pt_regs *regs, long error_code) tsk = current; if (!user_mode(regs)) { - if (fixup_exception(regs)) + if (fixup_exception(regs, X86_TRAP_GP)) return; tsk->thread.error_code = error_code; @@ -699,7 +699,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) conditional_sti(regs); if (!user_mode(regs)) { - if (!fixup_exception(regs)) { + if (!fixup_exception(regs, X86_TRAP_DE)) { task->thread.error_code = error_code; task->thread.trap_nr = trapnr; die(str, regs, error_code); diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 903ec1e9c326..7a592ec193d5 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -11,13 +11,51 @@ ex_insn_addr(const struct exception_table_entry *x) static inline unsigned long ex_fixup_addr(const struct exception_table_entry *x) { - return (unsigned long)&x->fixup + x->fixup; + long offset = (long)((u32)x->fixup & 0x3fffffff) - (long)_EXTABLE_BIAS; + return (unsigned long)&x->fixup + offset; } -int fixup_exception(struct pt_regs *regs) +/* Fixup functions for each exception class */ +static int fix_class_default(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) +{ + regs->ip = ex_fixup_addr(fixup); + return 1; +} +static int fix_class_fault(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) +{ + regs->ip = ex_fixup_addr(fixup); + regs->ax = trapnr; + return 1; +} +static int fix_class_ex(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) +{ + /* Special hack for uaccess_err */ + current_thread_info()->uaccess_err = 1; + regs->ip = ex_fixup_addr(fixup); + return 1; +} +static int fix_class_unused(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) +{ + /* can't happen unless exception table was corrupted */ + BUG_ON(1); + return 0; +} + +static int (*allclasses[])(const struct exception_table_entry *, + struct pt_regs *, int) = { + [EXTABLE_CLASS_DEFAULT] = fix_class_default, + [EXTABLE_CLASS_FAULT] = fix_class_fault, + [EXTABLE_CLASS_EX] = fix_class_ex, + [EXTABLE_CLASS_UNUSED] = fix_class_unused +}; + +int fixup_exception(struct pt_regs *regs, int trapnr) { const struct exception_table_entry *fixup; - unsigned long new_ip; #ifdef CONFIG_PNPBIOS if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) { @@ -34,17 +72,8 @@ int fixup_exception(struct pt_regs *regs) #endif fixup = search_exception_tables(regs->ip); - if (fixup) { - new_ip = ex_fixup_addr(fixup); - - if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) { - /* Special hack for uaccess_err */ - current_thread_info()->uaccess_err = 1; - new_ip -= 0x7ffffff0; - } - regs->ip = new_ip; - return 1; - } + if (fixup) + return allclasses[ex_class(fixup)](fixup, regs, trapnr); return 0; } @@ -53,18 +82,15 @@ int fixup_exception(struct pt_regs *regs) int __init early_fixup_exception(unsigned long *ip) { const struct exception_table_entry *fixup; - unsigned long new_ip; fixup = search_exception_tables(*ip); if (fixup) { - new_ip = ex_fixup_addr(fixup); - - if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) { - /* uaccess handling not supported during early boot */ + if (ex_class(fixup)) { + /* special handling not supported during early boot */ return 0; } - *ip = new_ip; + *ip = ex_fixup_addr(fixup); return 1; } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index eef44d9a3f77..495946c3f9dd 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -656,7 +656,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, int sig; /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs)) { + if (fixup_exception(regs, X86_TRAP_PF)) { /* * Any interrupt that takes a fault gets the fixup. This makes * the below recursive fault logic only apply to a faults from -- 2.1.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>