Compared to directly fetching the per-CPU offset from memory (or cache), using the global pointer (gp) to store the per-CPU offset can save one memory access. When compiling the kernel, the following command needs to be explicitly specified: export KCFLAGS="... -mno-relax" export KAFLAGS="... -mno-relax" Signed-off-by: Yunhui Cui <cuiyunhui@xxxxxxxxxxxxx> --- arch/riscv/include/asm/asm.h | 18 ++++++------------ arch/riscv/include/asm/percpu.h | 24 ++++++++++++++++++++++++ arch/riscv/kernel/asm-offsets.c | 1 + arch/riscv/kernel/entry.S | 4 ++-- arch/riscv/kernel/head.S | 9 --------- arch/riscv/kernel/smpboot.c | 7 +++++++ arch/riscv/kernel/suspend_entry.S | 2 -- 7 files changed, 40 insertions(+), 25 deletions(-) create mode 100644 arch/riscv/include/asm/percpu.h diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 776354895b81..be4e4e5ac134 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -109,19 +109,13 @@ REG_L \dst, 0(\dst) .endm -#ifdef CONFIG_SHADOW_CALL_STACK -/* gp is used as the shadow call stack pointer instead */ -.macro load_global_pointer +.macro load_pcpu_off_gp tmp + REG_L \tmp, TASK_TI_CPU(tp) + slli \tmp, \tmp, 3 + la gp, __per_cpu_offset + add gp, gp, \tmp + REG_L gp, 0(gp) .endm -#else -/* load __global_pointer to gp */ -.macro load_global_pointer -.option push -.option norelax - la gp, __global_pointer$ -.option pop -.endm -#endif /* CONFIG_SHADOW_CALL_STACK */ /* save all GPs except x1 ~ x5 */ .macro save_from_x6_to_x31 diff --git a/arch/riscv/include/asm/percpu.h b/arch/riscv/include/asm/percpu.h new file mode 100644 index 000000000000..858d0a93ff14 --- /dev/null +++ b/arch/riscv/include/asm/percpu.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_PERCPU_H +#define __ASM_PERCPU_H + +static inline void set_my_cpu_offset(unsigned long off) +{ + asm volatile("addi gp, %0, 0" :: "r" (off)); +} + +static inline unsigned long __kern_my_cpu_offset(void) +{ + unsigned long off; + + asm ("mv %0, gp":"=r" (off) :); + return off; +} + +#define __my_cpu_offset __kern_my_cpu_offset() + +#include <asm-generic/percpu.h> + +#endif /* __ASM_PERCPU_H */ + diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index b09ca5f944f7..5cc6d1de4ab4 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -36,6 +36,7 @@ void asm_offsets(void) OFFSET(TASK_THREAD_S9, task_struct, thread.s[9]); OFFSET(TASK_THREAD_S10, task_struct, thread.s[10]); OFFSET(TASK_THREAD_S11, task_struct, thread.s[11]); + OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu); OFFSET(TASK_TI_FLAGS, task_struct, thread_info.flags); OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count); OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index ac2e908d4418..39d7e66567cf 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -77,8 +77,8 @@ SYM_CODE_START(handle_exception) */ csrw CSR_SCRATCH, x0 - /* Load the global pointer */ - load_global_pointer + /* load __per_cpu_offset[cpu] to gp*/ + load_pcpu_off_gp t6 /* Load the kernel shadow call stack pointer if coming from userspace */ scs_load_current_if_task_changed s5 diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 356d5397b2a2..aa3d22967eef 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -110,9 +110,6 @@ relocate_enable_mmu: la a0, .Lsecondary_park csrw CSR_TVEC, a0 - /* Reload the global pointer */ - load_global_pointer - /* * Switch to kernel page tables. A full fence is necessary in order to * avoid using the trampoline translations, which are only correct for @@ -131,9 +128,6 @@ secondary_start_sbi: csrw CSR_IE, zero csrw CSR_IP, zero - /* Load the global pointer */ - load_global_pointer - /* * Disable FPU & VECTOR to detect illegal usage of * floating point or vector in kernel space @@ -228,9 +222,6 @@ SYM_CODE_START(_start_kernel) csrr a0, CSR_MHARTID #endif /* CONFIG_RISCV_M_MODE */ - /* Load the global pointer */ - load_global_pointer - /* * Disable FPU & VECTOR to detect illegal usage of * floating point or vector in kernel space diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 0f8f1c95ac38..844aede75662 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -41,6 +41,11 @@ static DECLARE_COMPLETION(cpu_running); +void __init smp_prepare_boot_cpu(void) +{ + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); +} + void __init smp_prepare_cpus(unsigned int max_cpus) { int cpuid; @@ -212,6 +217,8 @@ asmlinkage __visible void smp_callin(void) struct mm_struct *mm = &init_mm; unsigned int curr_cpuid = smp_processor_id(); + set_my_cpu_offset(per_cpu_offset(curr_cpuid)); + if (has_vector()) { /* * Return as early as possible so the hart with a mismatching diff --git a/arch/riscv/kernel/suspend_entry.S b/arch/riscv/kernel/suspend_entry.S index 2d54f309c140..0ec850489e0c 100644 --- a/arch/riscv/kernel/suspend_entry.S +++ b/arch/riscv/kernel/suspend_entry.S @@ -60,8 +60,6 @@ SYM_FUNC_START(__cpu_suspend_enter) SYM_FUNC_END(__cpu_suspend_enter) SYM_TYPED_FUNC_START(__cpu_resume_enter) - /* Load the global pointer */ - load_global_pointer #ifdef CONFIG_MMU /* Save A0 and A1 */ -- 2.39.2