4.14-stable review patch. If anyone has any objections, please let me know. ------------------ From: James Morse <james.morse@xxxxxxx> Commit 6d99b68933fbcf51f84fcbba49246ce1209ec193 upstream. Now that KVM uses tpidr_el2 in the same way as Linux's cpu_offset in tpidr_el1, merge the two. This saves KVM from save/restoring tpidr_el1 on VHE hosts, and allows future code to blindly access per-cpu variables without triggering world-switch. Signed-off-by: James Morse <james.morse@xxxxxxx> Reviewed-by: Christoffer Dall <cdall@xxxxxxxxxx> Reviewed-by: Catalin Marinas <catalin.marinas@xxxxxxx> Signed-off-by: Catalin Marinas <catalin.marinas@xxxxxxx> Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- arch/arm64/include/asm/alternative.h | 2 ++ arch/arm64/include/asm/assembler.h | 8 ++++++++ arch/arm64/include/asm/percpu.h | 11 +++++++++-- arch/arm64/kernel/alternative.c | 9 +++++---- arch/arm64/kernel/cpufeature.c | 17 +++++++++++++++++ arch/arm64/mm/proc.S | 8 ++++++++ 6 files changed, 49 insertions(+), 6 deletions(-) --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -12,6 +12,8 @@ #include <linux/stddef.h> #include <linux/stringify.h> +extern int alternatives_applied; + struct alt_instr { s32 orig_offset; /* offset to original instruction */ s32 alt_offset; /* offset to replacement instruction */ --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -260,7 +260,11 @@ lr .req x30 // link register #else adr_l \dst, \sym #endif +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN mrs \tmp, tpidr_el1 +alternative_else + mrs \tmp, tpidr_el2 +alternative_endif add \dst, \dst, \tmp .endm @@ -271,7 +275,11 @@ lr .req x30 // link register */ .macro ldr_this_cpu dst, sym, tmp adr_l \dst, \sym +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN mrs \tmp, tpidr_el1 +alternative_else + mrs \tmp, tpidr_el2 +alternative_endif ldr \dst, [\dst, \tmp] .endm --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -16,11 +16,15 @@ #ifndef __ASM_PERCPU_H #define __ASM_PERCPU_H +#include <asm/alternative.h> #include <asm/stack_pointer.h> static inline void set_my_cpu_offset(unsigned long off) { - asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory"); + asm volatile(ALTERNATIVE("msr tpidr_el1, %0", + "msr tpidr_el2, %0", + ARM64_HAS_VIRT_HOST_EXTN) + :: "r" (off) : "memory"); } static inline unsigned long __my_cpu_offset(void) @@ -31,7 +35,10 @@ static inline unsigned long __my_cpu_off * We want to allow caching the value, so avoid using volatile and * instead use a fake stack read to hazard against barrier(). */ - asm("mrs %0, tpidr_el1" : "=r" (off) : + asm(ALTERNATIVE("mrs %0, tpidr_el1", + "mrs %0, tpidr_el2", + ARM64_HAS_VIRT_HOST_EXTN) + : "=r" (off) : "Q" (*(const unsigned long *)current_stack_pointer)); return off; --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -32,6 +32,8 @@ #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) +int alternatives_applied; + struct alt_region { struct alt_instr *begin; struct alt_instr *end; @@ -143,7 +145,6 @@ static void __apply_alternatives(void *a */ static int __apply_alternatives_multi_stop(void *unused) { - static int patched = 0; struct alt_region region = { .begin = (struct alt_instr *)__alt_instructions, .end = (struct alt_instr *)__alt_instructions_end, @@ -151,14 +152,14 @@ static int __apply_alternatives_multi_st /* We always have a CPU 0 at this point (__init) */ if (smp_processor_id()) { - while (!READ_ONCE(patched)) + while (!READ_ONCE(alternatives_applied)) cpu_relax(); isb(); } else { - BUG_ON(patched); + BUG_ON(alternatives_applied); __apply_alternatives(®ion, true); /* Barriers provided by the cache flushing */ - WRITE_ONCE(patched, 1); + WRITE_ONCE(alternatives_applied, 1); } return 0; --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -880,6 +880,22 @@ static int __init parse_kpti(char *str) early_param("kpti", parse_kpti); #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ +static int cpu_copy_el2regs(void *__unused) +{ + /* + * Copy register values that aren't redirected by hardware. + * + * Before code patching, we only set tpidr_el1, all CPUs need to copy + * this value to tpidr_el2 before we patch the code. Once we've done + * that, freshly-onlined CPUs will set tpidr_el2, so we don't need to + * do anything here. + */ + if (!alternatives_applied) + write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); + + return 0; +} + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -949,6 +965,7 @@ static const struct arm64_cpu_capabiliti .capability = ARM64_HAS_VIRT_HOST_EXTN, .def_scope = SCOPE_SYSTEM, .matches = runs_at_el2, + .enable = cpu_copy_el2regs, }, { .desc = "32-bit EL0 Support", --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -70,7 +70,11 @@ ENTRY(cpu_do_suspend) mrs x8, mdscr_el1 mrs x9, oslsr_el1 mrs x10, sctlr_el1 +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN mrs x11, tpidr_el1 +alternative_else + mrs x11, tpidr_el2 +alternative_endif mrs x12, sp_el0 stp x2, x3, [x0] stp x4, xzr, [x0, #16] @@ -116,7 +120,11 @@ ENTRY(cpu_do_resume) msr mdscr_el1, x10 msr sctlr_el1, x12 +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN msr tpidr_el1, x13 +alternative_else + msr tpidr_el2, x13 +alternative_endif msr sp_el0, x14 /* * Restore oslsr_el1 by writing oslar_el1