The following commit has been merged into the x86/asm branch of tip: Commit-ID: 9d7de2aa8b41407bc96d89a80dc1fd637d389d42 Gitweb: https://git.kernel.org/tip/9d7de2aa8b41407bc96d89a80dc1fd637d389d42 Author: Brian Gerst <brgerst@xxxxxxxxx> AuthorDate: Thu, 23 Jan 2025 14:07:40 -05:00 Committer: Ingo Molnar <mingo@xxxxxxxxxx> CommitterDate: Tue, 18 Feb 2025 10:15:27 +01:00 x86/percpu/64: Use relative percpu offsets The percpu section is currently linked at absolute address 0, because older compilers hard-coded the stack protector canary value at a fixed offset from the start of the GS segment. Now that the canary is a normal percpu variable, the percpu section does not need to be linked at a specific address. x86-64 will now calculate the percpu offsets as the delta between the initial percpu address and the dynamically allocated memory, like other architectures. Note that GSBASE is limited to the canonical address width (48 or 57 bits, sign-extended). As long as the kernel text, modules, and the dynamically allocated percpu memory are all in the negative address space, the delta will not overflow this limit. Signed-off-by: Brian Gerst <brgerst@xxxxxxxxx> Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> Reviewed-by: Ard Biesheuvel <ardb@xxxxxxxxxx> Reviewed-by: Uros Bizjak <ubizjak@xxxxxxxxx> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Link: https://lore.kernel.org/r/20250123190747.745588-9-brgerst@xxxxxxxxx --- arch/x86/include/asm/processor.h | 6 +++++- arch/x86/kernel/head_64.S | 19 +++++++++---------- arch/x86/kernel/setup_percpu.c | 12 ++---------- arch/x86/kernel/vmlinux.lds.S | 29 +---------------------------- arch/x86/platform/pvh/head.S | 5 ++--- arch/x86/tools/relocs.c | 10 +++------- arch/x86/xen/xen-head.S | 9 ++++----- init/Kconfig | 2 +- 8 files changed, 27 insertions(+), 65 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a468712..b8fee88 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -431,7 +431,11 @@ DECLARE_INIT_PER_CPU(fixed_percpu_data); static inline unsigned long cpu_kernelmode_gs_base(int cpu) { - return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu); +#ifdef CONFIG_SMP + return per_cpu_offset(cpu); +#else + return 0; +#endif } extern asmlinkage void entry_SYSCALL32_ignore(void); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index c3d73c0..2843b0a 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -61,11 +61,14 @@ SYM_CODE_START_NOALIGN(startup_64) /* Set up the stack for verify_cpu() */ leaq __top_init_kernel_stack(%rip), %rsp - /* Setup GSBASE to allow stack canary access for C code */ + /* + * Set up GSBASE. + * Note that on SMP the boot CPU uses the init data section until + * the per-CPU areas are set up. + */ movl $MSR_GS_BASE, %ecx - leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx - movl %edx, %eax - shrq $32, %rdx + xorl %eax, %eax + xorl %edx, %edx wrmsr call startup_64_setup_gdt_idt @@ -359,16 +362,12 @@ SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL) movl %eax,%fs movl %eax,%gs - /* Set up %gs. - * - * The base of %gs always points to fixed_percpu_data. + /* + * Set up GSBASE. * Note that, on SMP, the boot cpu uses init data section until * the per cpu areas are set up. */ movl $MSR_GS_BASE,%ecx -#ifndef CONFIG_SMP - leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx -#endif movl %edx, %eax shrq $32, %rdx wrmsr diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index b30d6e1..1e7be94 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -23,18 +23,10 @@ #include <asm/cpumask.h> #include <asm/cpu.h> -#ifdef CONFIG_X86_64 -#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) -#else -#define BOOT_PERCPU_OFFSET 0 -#endif - -DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off); EXPORT_PER_CPU_SYMBOL(this_cpu_off); -unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = { - [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, -}; +unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init; EXPORT_SYMBOL(__per_cpu_offset); /* diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 0deb488..8a59851 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -112,12 +112,6 @@ ASSERT(__relocate_kernel_end - __relocate_kernel_start <= KEXEC_CONTROL_CODE_MAX PHDRS { text PT_LOAD FLAGS(5); /* R_E */ data PT_LOAD FLAGS(6); /* RW_ */ -#ifdef CONFIG_X86_64 -#ifdef CONFIG_SMP - percpu PT_LOAD FLAGS(6); /* RW_ */ -#endif - init PT_LOAD FLAGS(7); /* RWE */ -#endif note PT_NOTE FLAGS(0); /* ___ */ } @@ -216,21 +210,7 @@ SECTIONS __init_begin = .; /* paired with __init_end */ } -#if defined(CONFIG_X86_64) && defined(CONFIG_SMP) - /* - * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the - * output PHDR, so the next output section - .init.text - should - * start another segment - init. - */ - PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu) - ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START, - "per-CPU data too large - increase CONFIG_PHYSICAL_START") -#endif - INIT_TEXT_SECTION(PAGE_SIZE) -#ifdef CONFIG_X86_64 - :init -#endif /* * Section for code used exclusively before alternatives are run. All @@ -347,9 +327,7 @@ SECTIONS EXIT_DATA } -#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) PERCPU_SECTION(INTERNODE_CACHE_BYTES) -#endif RUNTIME_CONST_VARIABLES RUNTIME_CONST(ptr, USER_PTR_MAX) @@ -497,16 +475,11 @@ PROVIDE(__ref_stack_chk_guard = __stack_chk_guard); * Per-cpu symbols which need to be offset from __per_cpu_load * for the boot processor. */ -#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load +#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) INIT_PER_CPU(gdt_page); INIT_PER_CPU(fixed_percpu_data); INIT_PER_CPU(irq_stack_backing_store); -#ifdef CONFIG_SMP -. = ASSERT((fixed_percpu_data == 0), - "fixed_percpu_data is not at start of per-cpu area"); -#endif - #ifdef CONFIG_MITIGATION_UNRET_ENTRY . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned"); #endif diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index 723f181..cfa18ec 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -179,9 +179,8 @@ SYM_CODE_START(pvh_start_xen) * the per-CPU areas are set up. */ movl $MSR_GS_BASE,%ecx - leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx - movq %edx, %eax - shrq $32, %rdx + xorl %eax, %eax + xorl %edx, %edx wrmsr /* Call xen_prepare_pvh() via the kernel virtual mapping */ diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 92a1e50..3cb3b30 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -835,12 +835,7 @@ static void percpu_init(void) */ static int is_percpu_sym(ElfW(Sym) *sym, const char *symname) { - int shndx = sym_index(sym); - - return (shndx == per_cpu_shndx) && - strcmp(symname, "__init_begin") && - strcmp(symname, "__per_cpu_load") && - strncmp(symname, "init_per_cpu_", 13); + return 0; } @@ -1062,7 +1057,8 @@ static int cmp_relocs(const void *va, const void *vb) static void sort_relocs(struct relocs *r) { - qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs); + if (r->count) + qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs); } static int write32(uint32_t v, FILE *f) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index a31b057..5ccb4c5 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -31,15 +31,14 @@ SYM_CODE_START(startup_xen) leaq __top_init_kernel_stack(%rip), %rsp - /* Set up %gs. - * - * The base of %gs always points to fixed_percpu_data. + /* + * Set up GSBASE. * Note that, on SMP, the boot cpu uses init data section until * the per cpu areas are set up. */ movl $MSR_GS_BASE,%ecx - movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax - cdq + xorl %eax, %eax + xorl %edx, %edx wrmsr mov %rsi, %rdi diff --git a/init/Kconfig b/init/Kconfig index d0d021b..b5d9c0f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1872,7 +1872,7 @@ config KALLSYMS_ALL config KALLSYMS_ABSOLUTE_PERCPU bool depends on KALLSYMS - default X86_64 && SMP + default n # end of the "standard kernel features (expert users)" menu
![]() |