From: Peter Zijlstra <peterz@xxxxxxxxxxxxx> commit 15e67227c49a57837108acfe1c80570e1bd9f962 upstream. Introduce X86_FEATURE_RETHUNK for those afflicted with needing this. [ bp: Do only INT3 padding - simpler. ] Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Signed-off-by: Borislav Petkov <bp@xxxxxxx> Reviewed-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx> Signed-off-by: Borislav Petkov <bp@xxxxxxx> [cascardo: CONFIG_STACK_VALIDATION vs CONFIG_OBJTOOL] [cascardo: no IBT support] Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@xxxxxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- arch/x86/include/asm/alternative.h | 1 arch/x86/include/asm/cpufeatures.h | 1 arch/x86/include/asm/disabled-features.h | 3 + arch/x86/kernel/alternative.c | 60 +++++++++++++++++++++++++++++++ arch/x86/kernel/module.c | 8 +++- arch/x86/kernel/vmlinux.lds.S | 7 +++ 6 files changed, 78 insertions(+), 2 deletions(-) --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -76,6 +76,7 @@ extern int alternatives_patched; extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); extern void apply_retpolines(s32 *start, s32 *end); +extern void apply_returns(s32 *start, s32 *end); struct module; --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -298,6 +298,7 @@ /* FREE! (11*32+11) */ #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -60,7 +60,8 @@ # define DISABLE_RETPOLINE 0 #else # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ - (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) + (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \ + (1 << (X86_FEATURE_RETHUNK & 31))) #endif /* Force disable because it's broken beyond repair */ --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -115,6 +115,7 @@ static void __init_or_module add_nops(vo } extern s32 __retpoline_sites[], __retpoline_sites_end[]; +extern s32 __return_sites[], __return_sites_end[]; extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; void text_poke_early(void *addr, const void *opcode, size_t len); @@ -506,9 +507,67 @@ void __init_or_module noinline apply_ret } } +/* + * Rewrite the compiler generated return thunk tail-calls. + * + * For example, convert: + * + * JMP __x86_return_thunk + * + * into: + * + * RET + */ +static int patch_return(void *addr, struct insn *insn, u8 *bytes) +{ + int i = 0; + + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + return -1; + + bytes[i++] = RET_INSN_OPCODE; + + for (; i < insn->length;) + bytes[i++] = INT3_INSN_OPCODE; + + return i; +} + +void __init_or_module noinline apply_returns(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + void *addr = (void *)s + *s; + struct insn insn; + int len, ret; + u8 bytes[16]; + u8 op1; + + ret = insn_decode_kernel(&insn, addr); + if (WARN_ON_ONCE(ret < 0)) + continue; + + op1 = insn.opcode.bytes[0]; + if (WARN_ON_ONCE(op1 != JMP32_INSN_OPCODE)) + continue; + + DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", + addr, addr, insn.length, + addr + insn.length + insn.immediate.value); + + len = patch_return(addr, &insn, bytes); + if (len == insn.length) { + DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); + DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); + text_poke_early(addr, bytes, len); + } + } +} #else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } #endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */ @@ -824,6 +883,7 @@ void __init alternative_instructions(voi * those can rewrite the retpoline thunks. */ apply_retpolines(__retpoline_sites, __retpoline_sites_end); + apply_returns(__return_sites, __return_sites_end); /* * Then patch alternatives, such that those paravirt calls that are in --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -253,7 +253,7 @@ int module_finalize(const Elf_Ehdr *hdr, { const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, *para = NULL, *orc = NULL, *orc_ip = NULL, - *retpolines = NULL; + *retpolines = NULL, *returns = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { @@ -271,6 +271,8 @@ int module_finalize(const Elf_Ehdr *hdr, orc_ip = s; if (!strcmp(".retpoline_sites", secstrings + s->sh_name)) retpolines = s; + if (!strcmp(".return_sites", secstrings + s->sh_name)) + returns = s; } /* @@ -285,6 +287,10 @@ int module_finalize(const Elf_Ehdr *hdr, void *rseg = (void *)retpolines->sh_addr; apply_retpolines(rseg, rseg + retpolines->sh_size); } + if (returns) { + void *rseg = (void *)returns->sh_addr; + apply_returns(rseg, rseg + returns->sh_size); + } if (alt) { /* patch .altinstructions */ void *aseg = (void *)alt->sh_addr; --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -284,6 +284,13 @@ SECTIONS *(.retpoline_sites) __retpoline_sites_end = .; } + + . = ALIGN(8); + .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) { + __return_sites = .; + *(.return_sites) + __return_sites_end = .; + } #endif /*