For PIE binary, all symbol references are PC-relative addressing, even for percpu variable. So to keep compatible with PIE, add %rip suffix in percpu assembly macros if PIE is enabled. However, relocation of percpu variable references is broken now for PIE. It would be fixed later. Suggested-by: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx> Signed-off-by: Hou Wenlong <houwenlong.hwl@xxxxxxxxxxxx> Cc: Thomas Garnier <thgarnie@xxxxxxxxxxxx> Cc: Kees Cook <keescook@xxxxxxxxxxxx> --- arch/x86/entry/calling.h | 17 ++++++++++++---- arch/x86/include/asm/nospec-branch.h | 10 +++++----- arch/x86/include/asm/percpu.h | 29 +++++++++++++++++++++++++--- arch/x86/kernel/head_64.S | 2 +- arch/x86/kernel/kvm.c | 21 ++++++++++++++++---- arch/x86/lib/cmpxchg16b_emu.S | 8 ++++---- arch/x86/xen/xen-asm.S | 10 +++++----- 7 files changed, 71 insertions(+), 26 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index f6907627172b..11328578741d 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -173,7 +173,7 @@ For 32-bit we have the following conventions - kernel is built with .endm #define THIS_CPU_user_pcid_flush_mask \ - PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask + PER_CPU_VAR(cpu_tlbstate + TLB_STATE_user_pcid_flush_mask) .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI @@ -370,8 +370,8 @@ For 32-bit we have the following conventions - kernel is built with .endm .macro SAVE_AND_SET_GSBASE scratch_reg:req save_reg:req + GET_PERCPU_BASE \scratch_reg \save_reg rdgsbase \save_reg - GET_PERCPU_BASE \scratch_reg wrgsbase \scratch_reg .endm @@ -407,15 +407,24 @@ For 32-bit we have the following conventions - kernel is built with * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives * while running KVM's run loop. */ -.macro GET_PERCPU_BASE reg:req +#ifdef CONFIG_X86_PIE +.macro GET_PERCPU_BASE reg:req scratch_reg:req + LOAD_CPU_AND_NODE_SEG_LIMIT \reg + andq $VDSO_CPUNODE_MASK, \reg + leaq __per_cpu_offset(%rip), \scratch_reg + movq (\scratch_reg, \reg, 8), \reg +.endm +#else +.macro GET_PERCPU_BASE reg:req scratch_reg:req LOAD_CPU_AND_NODE_SEG_LIMIT \reg andq $VDSO_CPUNODE_MASK, \reg movq __per_cpu_offset(, \reg, 8), \reg .endm +#endif /* CONFIG_X86_PIE */ #else -.macro GET_PERCPU_BASE reg:req +.macro GET_PERCPU_BASE reg:req scratch_reg:req movq pcpu_unit_offsets(%rip), \reg .endm diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index edb2b0cb8efe..d8fd935e0697 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -59,13 +59,13 @@ #ifdef CONFIG_CALL_THUNKS_DEBUG # define CALL_THUNKS_DEBUG_INC_CALLS \ - incq %gs:__x86_call_count; + incq %gs:(__x86_call_count)__percpu_rel; # define CALL_THUNKS_DEBUG_INC_RETS \ - incq %gs:__x86_ret_count; + incq %gs:(__x86_ret_count)__percpu_rel; # define CALL_THUNKS_DEBUG_INC_STUFFS \ - incq %gs:__x86_stuffs_count; + incq %gs:(__x86_stuffs_count)__percpu_rel; # define CALL_THUNKS_DEBUG_INC_CTXSW \ - incq %gs:__x86_ctxsw_count; + incq %gs:(__x86_ctxsw_count)__percpu_rel; #else # define CALL_THUNKS_DEBUG_INC_CALLS # define CALL_THUNKS_DEBUG_INC_RETS @@ -95,7 +95,7 @@ CALL_THUNKS_DEBUG_INC_CALLS #define INCREMENT_CALL_DEPTH \ - sarq $5, %gs:pcpu_hot + X86_call_depth; \ + sarq $5, %gs:(pcpu_hot + X86_call_depth)__percpu_rel;\ CALL_THUNKS_DEBUG_INC_CALLS #define ASM_INCREMENT_CALL_DEPTH \ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 13c0d63ed55e..a627a073c6ea 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -4,16 +4,26 @@ #ifdef CONFIG_X86_64 #define __percpu_seg gs +#ifdef CONFIG_X86_PIE +#define __percpu_rel (%rip) +#else +#define __percpu_rel +#endif /* CONFIG_X86_PIE */ #else #define __percpu_seg fs +#define __percpu_rel #endif #ifdef __ASSEMBLY__ #ifdef CONFIG_SMP -#define PER_CPU_VAR(var) %__percpu_seg:var +/* Compatible with Position Independent Code */ +#define PER_CPU_VAR(var) %__percpu_seg:(var)##__percpu_rel +/* Rare absolute reference */ +#define PER_CPU_VAR_ABS(var) %__percpu_seg:var #else /* ! SMP */ -#define PER_CPU_VAR(var) var +#define PER_CPU_VAR(var) (var)##__percpu_rel +#define PER_CPU_VAR_ABS(var) var #endif /* SMP */ #ifdef CONFIG_X86_64_SMP @@ -148,10 +158,23 @@ do { \ (typeof(_var))(unsigned long) pfo_val__; \ }) +/* + * Position Independent code uses relative addresses only. + * The 'P' modifier prevents RIP-relative addressing in GCC, + * so use 'a' modifier instead. Howerver, 'P' modifier allows + * RIP-relative addressing in Clang but Clang doesn't support + * 'a' modifier. + */ +#if defined(CONFIG_X86_PIE) && defined(CONFIG_CC_IS_GCC) +#define __percpu_stable_arg __percpu_arg(a[var]) +#else +#define __percpu_stable_arg __percpu_arg(P[var]) +#endif + #define percpu_stable_op(size, op, _var) \ ({ \ __pcpu_type_##size pfo_val__; \ - asm(__pcpu_op2_##size(op, __percpu_arg(P[var]), "%[val]") \ + asm(__pcpu_op2_##size(op, __percpu_stable_arg, "%[val]") \ : [val] __pcpu_reg_##size("=", pfo_val__) \ : [var] "p" (&(_var))); \ (typeof(_var))(unsigned long) pfo_val__; \ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 61f1873d0ff7..1eed50b7d1ac 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -396,7 +396,7 @@ SYM_CODE_START(start_cpu0) UNWIND_HINT_END_OF_STACK /* Find the idle task stack */ - movq PER_CPU_VAR(pcpu_hot) + X86_current_task, %rcx + movq PER_CPU_VAR(pcpu_hot + X86_current_task), %rcx movq TASK_threadsp(%rcx), %rsp jmp .Ljump_to_C_code diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 1cceac5984da..32d7b201f4f0 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -794,14 +794,27 @@ PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted); extern bool __raw_callee_save___kvm_vcpu_is_preempted(long); +#ifndef CONFIG_X86_PIE +#define KVM_CHECK_VCPU_PREEMPTED \ + "movq __per_cpu_offset(,%rdi,8), %rax;" \ + "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);" +#else +#define KVM_CHECK_VCPU_PREEMPTED \ + "pushq %rdi;" \ + "leaq __per_cpu_offset(%rip), %rax;" \ + "movq (%rax,%rdi,8), %rax;" \ + "leaq steal_time(%rip), %rdi;" \ + "cmpb $0, (%rax, %rdi, 1);" \ + "popq %rdi;" +#endif + /* * Hand-optimize version for x86-64 to avoid 8 64-bit register saving and * restoring to/from the stack. */ -#define PV_VCPU_PREEMPTED_ASM \ - "movq __per_cpu_offset(,%rdi,8), %rax\n\t" \ - "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax)\n\t" \ - "setne %al\n\t" +#define PV_VCPU_PREEMPTED_ASM \ + KVM_CHECK_VCPU_PREEMPTED \ + "setne %al\n\t" DEFINE_PARAVIRT_ASM(__raw_callee_save___kvm_vcpu_is_preempted, PV_VCPU_PREEMPTED_ASM, .text); diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S index 33c70c0160ea..891c5e9fd868 100644 --- a/arch/x86/lib/cmpxchg16b_emu.S +++ b/arch/x86/lib/cmpxchg16b_emu.S @@ -27,13 +27,13 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu) pushfq cli - cmpq PER_CPU_VAR((%rsi)), %rax + cmpq PER_CPU_VAR_ABS((%rsi)), %rax jne .Lnot_same - cmpq PER_CPU_VAR(8(%rsi)), %rdx + cmpq PER_CPU_VAR_ABS(8(%rsi)), %rdx jne .Lnot_same - movq %rbx, PER_CPU_VAR((%rsi)) - movq %rcx, PER_CPU_VAR(8(%rsi)) + movq %rbx, PER_CPU_VAR_ABS((%rsi)) + movq %rcx, PER_CPU_VAR_ABS(8(%rsi)) popfq mov $1, %al diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 9e5e68008785..448958ddbaf8 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -28,7 +28,7 @@ * non-zero. */ SYM_FUNC_START(xen_irq_disable_direct) - movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask + movb $1, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask) RET SYM_FUNC_END(xen_irq_disable_direct) @@ -69,7 +69,7 @@ SYM_FUNC_END(check_events) SYM_FUNC_START(xen_irq_enable_direct) FRAME_BEGIN /* Unmask events */ - movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask + movb $0, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask) /* * Preempt here doesn't matter because that will deal with any @@ -78,7 +78,7 @@ SYM_FUNC_START(xen_irq_enable_direct) */ /* Test for pending */ - testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending + testb $0xff, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_pending) jz 1f call check_events @@ -97,7 +97,7 @@ SYM_FUNC_END(xen_irq_enable_direct) * x86 use opposite senses (mask vs enable). */ SYM_FUNC_START(xen_save_fl_direct) - testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask + testb $0xff, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask) setz %ah addb %ah, %ah RET @@ -113,7 +113,7 @@ SYM_FUNC_END(xen_read_cr2); SYM_FUNC_START(xen_read_cr2_direct) FRAME_BEGIN - _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX + _ASM_MOV PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_arch_cr2), %_ASM_AX FRAME_END RET SYM_FUNC_END(xen_read_cr2_direct); -- 2.31.1