On Tue, Feb 13, 2024 at 7:42 AM Ard Biesheuvel <ardb+git@xxxxxxxxxx> wrote: > > From: Ard Biesheuvel <ardb@xxxxxxxxxx> > > On x86_64, the core kernel is entered in long mode, which implies that > paging is enabled. This means that the CR4.LA57 control bit is > guaranteed to be in sync with the number of paging levels used by the > kernel, and there is no need to store this in a variable. > > There is also no need to use variables for storing the calculations of > pgdir_shift and ptrs_per_p4d, as they are easily determined on the fly. > > This removes the need for two different sources of truth for determining > whether 5-level paging is in use: CR4.LA57 always reflects the actual > state, and never changes from the point of view of the 64-bit core > kernel. The only potential concern is the cost of CR4 accesses, which > can be mitigated using alternatives patching based on feature detection. > > Note that even the decompressor does not manipulate any page tables > before updating CR4.LA57, so it can also avoid the associated global > variables entirely. However, as it does not implement alternatives > patching, the associated ELF sections need to be discarded. > > Signed-off-by: Ard Biesheuvel <ardb@xxxxxxxxxx> > --- > arch/x86/boot/compressed/misc.h | 4 -- > arch/x86/boot/compressed/pgtable_64.c | 12 ---- > arch/x86/boot/compressed/vmlinux.lds.S | 1 + > arch/x86/include/asm/pgtable_64_types.h | 58 ++++++++------------ > arch/x86/kernel/cpu/common.c | 2 - > arch/x86/kernel/head64.c | 33 +---------- > arch/x86/mm/kasan_init_64.c | 3 - > arch/x86/mm/mem_encrypt_identity.c | 9 --- > 8 files changed, 27 insertions(+), 95 deletions(-) > > diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h > index bc2f0f17fb90..2b15ddd0e177 100644 > --- a/arch/x86/boot/compressed/misc.h > +++ b/arch/x86/boot/compressed/misc.h > @@ -16,9 +16,6 @@ > > #define __NO_FORTIFY > > -/* cpu_feature_enabled() cannot be used this early */ > -#define USE_EARLY_PGTABLE_L5 > - > /* > * Boot stub deals with identity mappings, physical and virtual addresses are > * the same, so override these defines. > @@ -178,7 +175,6 @@ static inline int count_immovable_mem_regions(void) { return 0; } > #endif > > /* ident_map_64.c */ > -extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d; > extern void kernel_add_identity_map(unsigned long start, unsigned long end); > > /* Used by PAGE_KERN* macros: */ > diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c > index 51f957b24ba7..ae72f53f5e77 100644 > --- a/arch/x86/boot/compressed/pgtable_64.c > +++ b/arch/x86/boot/compressed/pgtable_64.c > @@ -9,13 +9,6 @@ > #define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */ > #define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */ > > -#ifdef CONFIG_X86_5LEVEL > -/* __pgtable_l5_enabled needs to be in .data to avoid being cleared along with .bss */ > -unsigned int __section(".data") __pgtable_l5_enabled; > -unsigned int __section(".data") pgdir_shift = 39; > -unsigned int __section(".data") ptrs_per_p4d = 1; > -#endif > - > /* Buffer to preserve trampoline memory */ > static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; > > @@ -125,11 +118,6 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) > native_cpuid_eax(0) >= 7 && > (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) { > l5_required = true; > - > - /* Initialize variables for 5-level paging */ > - __pgtable_l5_enabled = 1; > - pgdir_shift = 48; > - ptrs_per_p4d = 512; > } > > /* > diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S > index 083ec6d7722a..06358bb067fe 100644 > --- a/arch/x86/boot/compressed/vmlinux.lds.S > +++ b/arch/x86/boot/compressed/vmlinux.lds.S > @@ -81,6 +81,7 @@ SECTIONS > *(.dynamic) *(.dynsym) *(.dynstr) *(.dynbss) > *(.hash) *(.gnu.hash) > *(.note.*) > + *(.altinstructions .altinstr_replacement) > } > > .got.plt (INFO) : { > diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h > index 38b54b992f32..6a57bfdff52b 100644 > --- a/arch/x86/include/asm/pgtable_64_types.h > +++ b/arch/x86/include/asm/pgtable_64_types.h > @@ -6,7 +6,10 @@ > > #ifndef __ASSEMBLY__ > #include <linux/types.h> > +#include <asm/alternative.h> > +#include <asm/cpufeatures.h> > #include <asm/kaslr.h> > +#include <asm/processor-flags.h> > > /* > * These are used to make use of C type-checking.. > @@ -21,63 +24,50 @@ typedef unsigned long pgprotval_t; > typedef struct { pteval_t pte; } pte_t; > typedef struct { pmdval_t pmd; } pmd_t; > > -#ifdef CONFIG_X86_5LEVEL > -extern unsigned int __pgtable_l5_enabled; > - > -#ifdef USE_EARLY_PGTABLE_L5 > -/* > - * cpu_feature_enabled() is not available in early boot code. > - * Use variable instead. > - */ > -static inline bool pgtable_l5_enabled(void) > +static __always_inline __pure bool pgtable_l5_enabled(void) > { > - return __pgtable_l5_enabled; > -} > -#else > -#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57) > -#endif /* USE_EARLY_PGTABLE_L5 */ > + unsigned long r; > + bool ret; > > -#else > -#define pgtable_l5_enabled() 0 > -#endif /* CONFIG_X86_5LEVEL */ > + if (!IS_ENABLED(CONFIG_X86_5LEVEL)) > + return false; > + > + asm(ALTERNATIVE_TERNARY( > + "movq %%cr4, %[reg] \n\t btl %[la57], %k[reg]" CC_SET(c), > + %P[feat], "stc", "clc") > + : [reg] "=&r" (r), CC_OUT(c) (ret) > + : [feat] "i" (X86_FEATURE_LA57), > + [la57] "i" (X86_CR4_LA57_BIT) > + : "cc"); This should be more like _static_cpu_has(), where the runtime test is out of line in a discardable section, and the inline part is just a JMP or NOP. Brian Gerst