In the preparatory stage of CPU hotplug, the per-cpu pvclock data pointer assigns either an element of the static array or dynamically allocated memory for the pvclock data pointer. Currently, the dynamically allocated memory is not mapped decrypted. However, when SEV is active this memory range must be mapped decrypted. The C-bit determines the encryption status of a 4K page hence a full 4K page allocation would be required to store a single 32-byte pvclock variable. This could waste a fairly sizeable amount of memory since each CPU will perform a separate 4K allocation. Instead, define a second static array which will be used when SEV is active. This array will be put in the .data..decrypted section so that it is mapped decrypted during boot. The .data..decrypted section has a big chunk of memory that is currently unused. Since the second array will be used only when memory encryption is active, free it when memory encryption is not active. Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx> Suggested-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx> Acked-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> Cc: Tom Lendacky <thomas.lendacky@xxxxxxx> Cc: kvm@xxxxxxxxxxxxxxx Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Borislav Petkov <bp@xxxxxxx> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx> Cc: linux-kernel@xxxxxxxxxxxxxxx Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> Cc: Sean Christopherson <sean.j.christopherson@xxxxxxxxx> Cc: "Radim Krčmář" <rkrcmar@xxxxxxxxxx> --- arch/x86/include/asm/mem_encrypt.h | 4 ++++ arch/x86/kernel/kvmclock.c | 14 ++++++++++++++ arch/x86/kernel/vmlinux.lds.S | 3 +++ arch/x86/mm/init.c | 3 +++ arch/x86/mm/mem_encrypt.c | 10 ++++++++++ 5 files changed, 34 insertions(+) diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 802b2eb..3f2a5e3 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -48,11 +48,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); /* Architecture __weak replacement functions */ void __init mem_encrypt_init(void); +void __init mem_encrypt_free_decrypted_mem(void); bool sme_active(void); bool sev_active(void); #define __decrypted __attribute__((__section__(".data..decrypted"))) +#define __decrypted_aux __attribute__((__section__(".data..decrypted.aux"))) #else /* !CONFIG_AMD_MEM_ENCRYPT */ @@ -80,6 +82,7 @@ static inline int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } #define __decrypted +#define __decrypted_aux #endif /* CONFIG_AMD_MEM_ENCRYPT */ @@ -93,6 +96,7 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; #define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask) extern char __start_data_decrypted[], __end_data_decrypted[]; +extern char __start_data_decrypted_aux[]; #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 0b3110b..9d8bad5 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -65,6 +65,15 @@ static struct pvclock_vsyscall_time_info static struct pvclock_wall_clock wall_clock __decrypted; static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); +#ifdef CONFIG_AMD_MEM_ENCRYPT +/* + * The auxiliary array will be used when SEV is active. In non-SEV case, + * it will be freed by mem_encrypt_free_decrypted_mem(). + */ +static struct pvclock_vsyscall_time_info + hv_clock_aux[NR_CPUS] __decrypted_aux; +#endif + static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) { return &this_cpu_read(hv_clock_per_cpu)->pvti; @@ -269,6 +278,11 @@ static int kvmclock_setup_percpu(unsigned int cpu) /* Use the static page for the first CPUs, allocate otherwise */ if (cpu < HVC_BOOT_ARRAY_SIZE) p = &hv_clock_boot[cpu]; +#ifdef CONFIG_AMD_MEM_ENCRYPT + /* Use the static page from auxiliary array instead of allocating it. */ + else if (sev_active()) + p = &hv_clock_aux[cpu - HVC_BOOT_ARRAY_SIZE]; +#endif else p = kzalloc(sizeof(*p), GFP_KERNEL); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index ae8153e..b78e117 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -77,6 +77,9 @@ jiffies_64 = jiffies; . = ALIGN(PMD_SIZE); \ __start_data_decrypted = .; \ *(.data..decrypted); \ + . = ALIGN(PAGE_SIZE); \ + __start_data_decrypted_aux = .; \ + *(.data..decrypted.aux); \ . = ALIGN(PMD_SIZE); \ __end_data_decrypted = .; \ diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 7a8fc26..b3cc33d 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -815,9 +815,12 @@ void free_kernel_image_pages(void *begin, void *end) set_memory_np_noalias(begin_ul, len_pages); } +void __weak mem_encrypt_free_decrypted_mem(void) { } + void __ref free_initmem(void) { e820__reallocate_tables(); + mem_encrypt_free_decrypted_mem(); free_kernel_image_pages(&__init_begin, &__init_end); } diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index b2de398..f1ab7f5 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -348,6 +348,16 @@ bool sev_active(void) EXPORT_SYMBOL(sev_active); /* Architecture __weak replacement functions */ +void __init mem_encrypt_free_decrypted_mem(void) +{ + if (mem_encrypt_active()) + return; + + free_init_pages("unused decrypted", + (unsigned long)__start_data_decrypted_aux, + (unsigned long)__end_data_decrypted); +} + void __init mem_encrypt_init(void) { if (!sme_me_mask) -- 2.7.4