From: Tom Lendacky <thomas.lendacky@xxxxxxx> Provide initial support for accessing the GHCB when needing to access registers for an SEV-ES guest. The support consists of: - Accessing the GHCB instead of the VMSA when reading and writing guest registers (after the VMSA has been encrypted). - Creating register access override functions for reading and writing guest registers from the common KVM support. - Allocating pages for the VMSA and GHCB when creating each vCPU - The VMSA page holds the encrypted VMSA for the vCPU - The GHCB page is used to hold a copy of the guest GHCB during VMGEXIT processing. Signed-off-by: Tom Lendacky <thomas.lendacky@xxxxxxx> --- arch/x86/include/asm/kvm_host.h | 7 ++ arch/x86/include/asm/msr-index.h | 1 + arch/x86/kvm/kvm_cache_regs.h | 30 +++++-- arch/x86/kvm/svm/svm.c | 138 ++++++++++++++++++++++++++++++- arch/x86/kvm/svm/svm.h | 65 ++++++++++++++- 5 files changed, 230 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5303dbc5c9bc..c900992701d6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -788,6 +788,9 @@ struct kvm_vcpu_arch { /* AMD MSRC001_0015 Hardware Configuration */ u64 msr_hwcr; + + /* SEV-ES support */ + bool vmsa_encrypted; }; struct kvm_lpage_info { @@ -1227,6 +1230,10 @@ struct kvm_x86_ops { int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); void (*migrate_timers)(struct kvm_vcpu *vcpu); + + void (*reg_read_override)(struct kvm_vcpu *vcpu, enum kvm_reg reg); + void (*reg_write_override)(struct kvm_vcpu *vcpu, enum kvm_reg reg, + unsigned long val); }; struct kvm_x86_nested_ops { diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 249a4147c4b2..16f5b20bb099 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -466,6 +466,7 @@ #define MSR_AMD64_IBSBRTARGET 0xc001103b #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index cfe83d4ae625..e87eb90999d5 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -9,15 +9,21 @@ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE | X86_CR4_TSD) -#define BUILD_KVM_GPR_ACCESSORS(lname, uname) \ -static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\ -{ \ - return vcpu->arch.regs[VCPU_REGS_##uname]; \ -} \ -static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu, \ - unsigned long val) \ -{ \ - vcpu->arch.regs[VCPU_REGS_##uname] = val; \ +#define BUILD_KVM_GPR_ACCESSORS(lname, uname) \ +static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu) \ +{ \ + if (kvm_x86_ops.reg_read_override) \ + kvm_x86_ops.reg_read_override(vcpu, VCPU_REGS_##uname); \ + \ + return vcpu->arch.regs[VCPU_REGS_##uname]; \ +} \ +static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu, \ + unsigned long val) \ +{ \ + if (kvm_x86_ops.reg_write_override) \ + kvm_x86_ops.reg_write_override(vcpu, VCPU_REGS_##uname, val); \ + \ + vcpu->arch.regs[VCPU_REGS_##uname] = val; \ } BUILD_KVM_GPR_ACCESSORS(rax, RAX) BUILD_KVM_GPR_ACCESSORS(rbx, RBX) @@ -67,6 +73,9 @@ static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg) if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS)) return 0; + if (kvm_x86_ops.reg_read_override) + kvm_x86_ops.reg_read_override(vcpu, reg); + if (!kvm_register_is_available(vcpu, reg)) kvm_x86_ops.cache_reg(vcpu, reg); @@ -79,6 +88,9 @@ static inline void kvm_register_write(struct kvm_vcpu *vcpu, int reg, if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS)) return; + if (kvm_x86_ops.reg_write_override) + kvm_x86_ops.reg_write_override(vcpu, reg, val); + vcpu->arch.regs[reg] = val; kvm_register_mark_dirty(vcpu, reg); } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 779c167e42cc..d1f52211627a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1175,6 +1175,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) struct page *msrpm_pages; struct page *hsave_page; struct page *nested_msrpm_pages; + struct page *vmsa_page = NULL; int err; BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0); @@ -1197,9 +1198,19 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) if (!hsave_page) goto free_page3; + if (sev_es_guest(svm->vcpu.kvm)) { + /* + * SEV-ES guests require a separate VMSA page used to contain + * the encrypted register state of the guest. + */ + vmsa_page = alloc_page(GFP_KERNEL); + if (!vmsa_page) + goto free_page4; + } + err = avic_init_vcpu(svm); if (err) - goto free_page4; + goto free_page5; /* We initialize this flag to true to make sure that the is_running * bit would be set the first time the vcpu is loaded. @@ -1219,6 +1230,12 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm->vmcb = page_address(page); clear_page(svm->vmcb); svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT); + + if (vmsa_page) { + svm->vmsa = page_address(vmsa_page); + clear_page(svm->vmsa); + } + svm->asid_generation = 0; init_vmcb(svm); @@ -1227,6 +1244,9 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) return 0; +free_page5: + if (vmsa_page) + __free_page(vmsa_page); free_page4: __free_page(hsave_page); free_page3: @@ -1258,6 +1278,26 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) */ svm_clear_current_vmcb(svm->vmcb); + if (sev_es_guest(vcpu->kvm)) { + struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; + + if (vcpu->arch.vmsa_encrypted) { + u64 page_to_flush; + + /* + * The VMSA page was used by hardware to hold guest + * encrypted state, be sure to flush it before returning + * it to the system. This is done using the VM Page + * Flush MSR (which takes the page virtual address and + * guest ASID). + */ + page_to_flush = (u64)svm->vmsa | sev->asid; + wrmsrl(MSR_AMD64_VM_PAGE_FLUSH, page_to_flush); + } + + __free_page(virt_to_page(svm->vmsa)); + } + __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT)); __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); __free_page(virt_to_page(svm->nested.hsave)); @@ -4012,6 +4052,99 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu) (svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT)); } +/* + * These return values represent the offset in quad words within the VM save + * area. This allows them to be accessed by casting the save area to a u64 + * array. + */ +#define VMSA_REG_ENTRY(_field) (offsetof(struct vmcb_save_area, _field) / sizeof(u64)) +#define VMSA_REG_UNDEF VMSA_REG_ENTRY(valid_bitmap) +static inline unsigned int vcpu_to_vmsa_entry(enum kvm_reg reg) +{ + switch (reg) { + case VCPU_REGS_RAX: return VMSA_REG_ENTRY(rax); + case VCPU_REGS_RBX: return VMSA_REG_ENTRY(rbx); + case VCPU_REGS_RCX: return VMSA_REG_ENTRY(rcx); + case VCPU_REGS_RDX: return VMSA_REG_ENTRY(rdx); + case VCPU_REGS_RSP: return VMSA_REG_ENTRY(rsp); + case VCPU_REGS_RBP: return VMSA_REG_ENTRY(rbp); + case VCPU_REGS_RSI: return VMSA_REG_ENTRY(rsi); + case VCPU_REGS_RDI: return VMSA_REG_ENTRY(rdi); +#ifdef CONFIG_X86_64 + case VCPU_REGS_R8: return VMSA_REG_ENTRY(r8); + case VCPU_REGS_R9: return VMSA_REG_ENTRY(r9); + case VCPU_REGS_R10: return VMSA_REG_ENTRY(r10); + case VCPU_REGS_R11: return VMSA_REG_ENTRY(r11); + case VCPU_REGS_R12: return VMSA_REG_ENTRY(r12); + case VCPU_REGS_R13: return VMSA_REG_ENTRY(r13); + case VCPU_REGS_R14: return VMSA_REG_ENTRY(r14); + case VCPU_REGS_R15: return VMSA_REG_ENTRY(r15); +#endif + case VCPU_REGS_RIP: return VMSA_REG_ENTRY(rip); + default: + WARN_ONCE(1, "unsupported VCPU to VMSA register conversion\n"); + return VMSA_REG_UNDEF; + } +} + +/* For SEV-ES guests, populate the vCPU register from the appropriate VMSA/GHCB */ +static void svm_reg_read_override(struct kvm_vcpu *vcpu, enum kvm_reg reg) +{ + struct vmcb_save_area *vmsa; + struct vcpu_svm *svm; + unsigned int entry; + unsigned long val; + u64 *vmsa_reg; + + if (!sev_es_guest(vcpu->kvm)) + return; + + entry = vcpu_to_vmsa_entry(reg); + if (entry == VMSA_REG_UNDEF) + return; + + svm = to_svm(vcpu); + vmsa = get_vmsa(svm); + vmsa_reg = (u64 *)vmsa; + val = (unsigned long)vmsa_reg[entry]; + + /* If a GHCB is mapped, check the bitmap of valid entries */ + if (svm->ghcb) { + if (!test_bit(entry, (unsigned long *)vmsa->valid_bitmap)) + val = 0; + } + + vcpu->arch.regs[reg] = val; +} + +/* For SEV-ES guests, set the vCPU register in the appropriate VMSA */ +static void svm_reg_write_override(struct kvm_vcpu *vcpu, enum kvm_reg reg, + unsigned long val) +{ + struct vmcb_save_area *vmsa; + struct vcpu_svm *svm; + unsigned int entry; + u64 *vmsa_reg; + + entry = vcpu_to_vmsa_entry(reg); + if (entry == VMSA_REG_UNDEF) + return; + + svm = to_svm(vcpu); + vmsa = get_vmsa(svm); + vmsa_reg = (u64 *)vmsa; + + /* If a GHCB is mapped, set the bit to indicate a valid entry */ + if (svm->ghcb) { + unsigned int index = entry / 8; + unsigned int shift = entry % 8; + + vmsa->valid_bitmap[index] |= BIT(shift); + } + + vmsa_reg[entry] = val; +} + static void svm_vm_destroy(struct kvm *kvm) { avic_vm_destroy(kvm); @@ -4150,6 +4283,9 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .need_emulation_on_page_fault = svm_need_emulation_on_page_fault, .apic_init_signal_blocked = svm_apic_init_signal_blocked, + + .reg_read_override = svm_reg_read_override, + .reg_write_override = svm_reg_write_override, }; static struct kvm_x86_init_ops svm_init_ops __initdata = { diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index f42ba9d158df..ff587536f571 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -159,6 +159,10 @@ struct vcpu_svm { */ struct list_head ir_list; spinlock_t ir_list_lock; + + /* SEV-ES support */ + struct vmcb_save_area *vmsa; + struct ghcb *ghcb; }; struct svm_cpu_data { @@ -509,9 +513,34 @@ void sev_hardware_teardown(void); static inline struct vmcb_save_area *get_vmsa(struct vcpu_svm *svm) { - return &svm->vmcb->save; + struct vmcb_save_area *vmsa; + + if (sev_es_guest(svm->vcpu.kvm)) { + /* + * Before LAUNCH_UPDATE_VMSA, use the actual SEV-ES save area + * to construct the initial state. Afterwards, use the mapped + * GHCB in a VMGEXIT or the traditional save area as a scratch + * area when outside of a VMGEXIT. + */ + if (svm->vcpu.arch.vmsa_encrypted) { + if (svm->ghcb) + vmsa = &svm->ghcb->save; + else + vmsa = &svm->vmcb->save; + } else { + vmsa = svm->vmsa; + } + } else { + vmsa = &svm->vmcb->save; + } + + return vmsa; } +#define SEV_ES_SET_VALID(_vmsa, _field) \ + __set_bit(GHCB_BITMAP_IDX(_field), \ + (unsigned long *)(_vmsa)->valid_bitmap) + #define DEFINE_VMSA_SEGMENT_ENTRY(_field, _entry, _size) \ static inline _size \ svm_##_field##_read_##_entry(struct vcpu_svm *svm) \ @@ -528,6 +557,9 @@ static inline struct vmcb_save_area *get_vmsa(struct vcpu_svm *svm) struct vmcb_save_area *vmsa = get_vmsa(svm); \ \ vmsa->_field._entry = value; \ + if (svm->vcpu.arch.vmsa_encrypted) { \ + SEV_ES_SET_VALID(vmsa, _field); \ + } \ } \ #define DEFINE_VMSA_SEGMENT_ACCESSOR(_field) \ @@ -551,6 +583,9 @@ static inline struct vmcb_save_area *get_vmsa(struct vcpu_svm *svm) struct vmcb_save_area *vmsa = get_vmsa(svm); \ \ vmsa->_field = *seg; \ + if (svm->vcpu.arch.vmsa_encrypted) { \ + SEV_ES_SET_VALID(vmsa, _field); \ + } \ } DEFINE_VMSA_SEGMENT_ACCESSOR(cs) @@ -579,6 +614,9 @@ DEFINE_VMSA_SEGMENT_ACCESSOR(tr) struct vmcb_save_area *vmsa = get_vmsa(svm); \ \ vmsa->_field = value; \ + if (svm->vcpu.arch.vmsa_encrypted) { \ + SEV_ES_SET_VALID(vmsa, _field); \ + } \ } \ \ static inline void \ @@ -587,6 +625,9 @@ DEFINE_VMSA_SEGMENT_ACCESSOR(tr) struct vmcb_save_area *vmsa = get_vmsa(svm); \ \ vmsa->_field &= value; \ + if (svm->vcpu.arch.vmsa_encrypted) { \ + SEV_ES_SET_VALID(vmsa, _field); \ + } \ } \ \ static inline void \ @@ -595,6 +636,9 @@ DEFINE_VMSA_SEGMENT_ACCESSOR(tr) struct vmcb_save_area *vmsa = get_vmsa(svm); \ \ vmsa->_field |= value; \ + if (svm->vcpu.arch.vmsa_encrypted) { \ + SEV_ES_SET_VALID(vmsa, _field); \ + } \ } #define DEFINE_VMSA_ACCESSOR(_field) \ @@ -629,6 +673,25 @@ DEFINE_VMSA_ACCESSOR(last_excp_to) DEFINE_VMSA_U8_ACCESSOR(cpl) DEFINE_VMSA_ACCESSOR(rip) DEFINE_VMSA_ACCESSOR(rax) +DEFINE_VMSA_ACCESSOR(rbx) +DEFINE_VMSA_ACCESSOR(rcx) +DEFINE_VMSA_ACCESSOR(rdx) DEFINE_VMSA_ACCESSOR(rsp) +DEFINE_VMSA_ACCESSOR(rbp) +DEFINE_VMSA_ACCESSOR(rsi) +DEFINE_VMSA_ACCESSOR(rdi) +DEFINE_VMSA_ACCESSOR(r8) +DEFINE_VMSA_ACCESSOR(r9) +DEFINE_VMSA_ACCESSOR(r10) +DEFINE_VMSA_ACCESSOR(r11) +DEFINE_VMSA_ACCESSOR(r12) +DEFINE_VMSA_ACCESSOR(r13) +DEFINE_VMSA_ACCESSOR(r14) +DEFINE_VMSA_ACCESSOR(r15) +DEFINE_VMSA_ACCESSOR(sw_exit_code) +DEFINE_VMSA_ACCESSOR(sw_exit_info_1) +DEFINE_VMSA_ACCESSOR(sw_exit_info_2) +DEFINE_VMSA_ACCESSOR(sw_scratch) +DEFINE_VMSA_ACCESSOR(xcr0) #endif -- 2.28.0