If VM-entry fails due to VM-entry MSR-loading, the MSRs of the nested guests need to be rolled back to their previous state in order for the guest to not be in an inconsistent state. Suggested-by: Jim Mattson <jmattson@xxxxxxxxxx> Signed-off-by: Krish Sadhukhan <krish.sadhukhan@xxxxxxxxxx> Reviewed-by: Karl Heubaum <karl.heubaum@xxxxxxxxxx> --- arch/x86/kvm/vmx/nested.c | 50 +++++++++++++++++++++++++++++++++++---- arch/x86/kvm/vmx/nested.h | 26 +++++++++++++++----- arch/x86/kvm/vmx/vmx.h | 8 +++++++ 3 files changed, 74 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index cebdcb105ea8..bd8e7af5c1e5 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -191,7 +191,7 @@ static int nested_vmx_failValid(struct kvm_vcpu *vcpu, return kvm_skip_emulated_instruction(vcpu); } -static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator) +void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator) { /* TODO: not to reset guest simply here. */ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); @@ -894,11 +894,13 @@ static u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu) * as possible, process all valid entries before failing rather than precheck * for a capacity violation. */ -static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) +static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count, + bool save) { u32 i; struct vmx_msr_entry e; u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu); + struct vcpu_vmx *vmx = to_vmx(vcpu); for (i = 0; i < count; i++) { if (unlikely(i >= max_msr_list_size)) @@ -917,6 +919,16 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) __func__, i, e.index, e.reserved); goto fail; } + if (save) { + vmx->nested.vm_entry_msr_load_backup[i].index = e.index; + if (kvm_get_msr(vcpu, e.index, + &(vmx->nested.vm_entry_msr_load_backup[i].data))) { + pr_debug_ratelimited( + "%s cannot read MSR (%u, 0x%x)\n", + __func__, i, e.index); + goto fail; + } + } if (kvm_set_msr(vcpu, e.index, e.value)) { pr_debug_ratelimited( "%s cannot write MSR (%u, 0x%x, 0x%llx)\n", @@ -926,6 +938,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) } return 0; fail: + kfree(vmx->nested.vm_entry_msr_load_backup); return i + 1; } @@ -973,6 +986,26 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) return 0; } +int nested_vmx_rollback_msr(struct kvm_vcpu *vcpu, u32 count) +{ + u32 i; + struct msr_data msr; + struct vcpu_vmx *vmx = to_vmx(vcpu); + + for (i = 0; i < count; i++) { + msr.host_initiated = false; + msr.index = vmx->nested.vm_entry_msr_load_backup[i].index; + msr.data = vmx->nested.vm_entry_msr_load_backup[i].data; + if (kvm_set_msr(vcpu, msr.index, msr.data)) { + pr_debug_ratelimited( + "%s WRMSR failed (%u, 0x%x, 0x%llx)\n", + __func__, i, msr.index, msr.data); + return -EINVAL; + } + } + return 0; +} + static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val) { unsigned long invalid_mask; @@ -3102,9 +3135,18 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) goto vmentry_fail_vmexit_guest_mode; if (from_vmentry) { + u32 count = vmcs12->vm_entry_msr_load_count; + + /* Save guest MSRs before we load them */ + vmx->nested.vm_entry_msr_load_backup = + kcalloc(count, sizeof(struct msr_data), GFP_KERNEL_ACCOUNT); + if (!vmx->nested.vm_entry_msr_load_backup) + goto vmentry_fail_vmexit_guest_mode; + exit_qual = nested_vmx_load_msr(vcpu, vmcs12->vm_entry_msr_load_addr, - vmcs12->vm_entry_msr_load_count); + vmcs12->vm_entry_msr_load_count, + true); if (exit_qual) { /* * According to section “VM Entries” in Intel SDM @@ -3940,7 +3982,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vmx_update_msr_bitmap(vcpu); if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, - vmcs12->vm_exit_msr_load_count)) + vmcs12->vm_exit_msr_load_count, false)) nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL); } diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index bb51ec8cf7da..f951b2b338d2 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -17,6 +17,8 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry); bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason); void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, u32 exit_intr_info, unsigned long exit_qualification); +void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator); +int nested_vmx_rollback_msr(struct kvm_vcpu *vcpu, u32 count); void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu); int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata); @@ -66,7 +68,6 @@ static inline int nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu, { u32 exit_qual; u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - struct vmx_msr_entry *addr; /* * At this point, the exit interruption info in exit_intr_info @@ -85,11 +86,24 @@ static inline int nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu, exit_qual = vmcs_readl(EXIT_QUALIFICATION); - addr = __va(vmcs_read64(VM_ENTRY_MSR_LOAD_ADDR)); - if (addr && addr->index == MSR_FS_BASE && - (exit_reason == (VMX_EXIT_REASONS_FAILED_VMENTRY | - EXIT_REASON_MSR_LOAD_FAIL))) { - exit_qual = (to_vmx(vcpu))->nested.invalid_msr_load_exit_qual; + if (exit_reason == (VMX_EXIT_REASONS_FAILED_VMENTRY | + EXIT_REASON_MSR_LOAD_FAIL)) { + + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + struct vmx_msr_entry *addr; + + if (nested_vmx_rollback_msr(vcpu, + vmcs12->vm_entry_msr_load_count)) { + nested_vmx_abort(vcpu, + VMX_ABORT_SAVE_GUEST_MSR_FAIL); + + kfree(to_vmx(vcpu)->nested.vm_entry_msr_load_backup); + } + + addr = __va(vmcs_read64(VM_ENTRY_MSR_LOAD_ADDR)); + if (addr && addr->index == MSR_FS_BASE) + exit_qual = + (to_vmx(vcpu))->nested.invalid_msr_load_exit_qual; } nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info, exit_qual); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index ee7f40abd199..9a7c118036be 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -189,6 +189,14 @@ struct nested_vmx { * due to invalid VM-entry MSR-load area in vmcs12. */ u32 invalid_msr_load_exit_qual; + + /* + * This is used for backing up the MSRs of nested guests when + * those MSRs are loaded from VM-entry MSR-load area on VM-entry. + * If VM-entry fails due to VM-entry MSR-loading, we roll back + * the MSRs to the values saved here. + */ + struct msr_data *vm_entry_msr_load_backup; }; struct vcpu_vmx { -- 2.20.1