Eugene Korenevsky <ekorenevsky@xxxxxxxxx> writes: > Several hypervisors use MSR loading/storing to run guests. > This patch implements emulation of this feature and allows these > hypervisors to work in L1. > > The following is emulated: > - Loading MSRs on VM-entries > - Saving MSRs on VM-exits > - Loading MSRs on VM-exits > > Actions taken on loading MSRs: > - MSR load area is verified > - For each MSR entry from load area: > - MSR load entry is read and verified > - MSR value is safely written > Actions taken on storing MSRs: > - MSR store area is verified > - For each MSR entry from store area: > - MSR entry is read and verified > - MSR value is safely read using MSR index from MSR entry > - MSR value is written to MSR entry > > The code performs checks required by Intel Software Developer Manual. > Thank you for the commit message. > This patch is partially based on Wincy Wan's work. Typo in name -> ^ I have added Jan and Wincy to the CC list since they reviewed your earlier proposal. I think it would be better to split this up as I mentioned earlier, however, if the other reviewers and the maintainer don't have objections, I am ok :) > > Signed-off-by: Eugene Korenevsky <ekorenevsky@xxxxxxxxx> > --- > arch/x86/include/asm/vmx.h | 6 + > arch/x86/include/uapi/asm/msr-index.h | 3 + > arch/x86/include/uapi/asm/vmx.h | 2 + > arch/x86/kvm/vmx.c | 210 ++++++++++++++++++++++++++++++++-- > virt/kvm/kvm_main.c | 1 + > 5 files changed, 215 insertions(+), 7 deletions(-) > > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > index 45afaee..8bdb247 100644 > --- a/arch/x86/include/asm/vmx.h > +++ b/arch/x86/include/asm/vmx.h > @@ -457,6 +457,12 @@ struct vmx_msr_entry { > #define ENTRY_FAIL_VMCS_LINK_PTR 4 > > /* > + * VMX Abort codes > + */ > +#define VMX_ABORT_MSR_STORE_FAILURE 1 > +#define VMX_ABORT_MSR_LOAD_FAILURE 4 > + > +/* > * VM-instruction error numbers > */ > enum vm_instruction_error_number { > diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h > index e21331c..3c9c601 100644 > --- a/arch/x86/include/uapi/asm/msr-index.h > +++ b/arch/x86/include/uapi/asm/msr-index.h > @@ -316,6 +316,9 @@ > #define MSR_IA32_UCODE_WRITE 0x00000079 > #define MSR_IA32_UCODE_REV 0x0000008b > > +#define MSR_IA32_SMM_MONITOR_CTL 0x0000009b > +#define MSR_IA32_SMBASE 0x0000009e > + > #define MSR_IA32_PERF_STATUS 0x00000198 > #define MSR_IA32_PERF_CTL 0x00000199 > #define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 > diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h > index b813bf9..52ad8e2 100644 > --- a/arch/x86/include/uapi/asm/vmx.h > +++ b/arch/x86/include/uapi/asm/vmx.h > @@ -56,6 +56,7 @@ > #define EXIT_REASON_MSR_READ 31 > #define EXIT_REASON_MSR_WRITE 32 > #define EXIT_REASON_INVALID_STATE 33 > +#define EXIT_REASON_MSR_LOAD_FAILURE 34 > #define EXIT_REASON_MWAIT_INSTRUCTION 36 > #define EXIT_REASON_MONITOR_INSTRUCTION 39 > #define EXIT_REASON_PAUSE_INSTRUCTION 40 > @@ -116,6 +117,7 @@ > { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \ > { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ > { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ > + { EXIT_REASON_MSR_LOAD_FAILURE, "MSR_LOAD_FAILURE" }, \ > { EXIT_REASON_INVD, "INVD" }, \ > { EXIT_REASON_INVVPID, "INVVPID" }, \ > { EXIT_REASON_INVPCID, "INVPCID" }, \ > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 9bcc871..86dc7db 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -8571,6 +8571,168 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip); > } > > +static bool vmx_msr_switch_area_verify(struct kvm_vcpu *vcpu, > + unsigned long count_field, > + unsigned long addr_field, > + int maxphyaddr) > +{ > + u64 count, addr; > + > + BUG_ON(vmcs12_read_any(vcpu, count_field, &count)); > + BUG_ON(vmcs12_read_any(vcpu, addr_field, &addr)); BUG_ON() is a bit harsh, please use WARN_ON and bail out. > + if (!IS_ALIGNED(addr, 16)) > + goto fail; > + if (addr >> maxphyaddr) > + goto fail; > + if ((addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) > + goto fail; > + return true; > +fail: > + pr_warn_ratelimited( > + "nVMX: invalid MSR switch (0x%lx, 0x%lx, %d, %llu, 0x%08llx)", > + count_field, addr_field, maxphyaddr, count, addr); > + return false; > +} > + > +static bool nested_vmx_msr_switch_verify(struct kvm_vcpu *vcpu, > + struct vmcs12 *vmcs12) > +{ > + int maxphyaddr; > + > + if (vmcs12->vm_exit_msr_load_count == 0 && > + vmcs12->vm_exit_msr_store_count == 0 && > + vmcs12->vm_entry_msr_load_count == 0) > + return true; /* Fast path */ > + maxphyaddr = cpuid_maxphyaddr(vcpu); > + return vmx_msr_switch_area_verify(vcpu, VM_EXIT_MSR_LOAD_COUNT, > + VM_EXIT_MSR_LOAD_ADDR, maxphyaddr) && > + vmx_msr_switch_area_verify(vcpu, VM_EXIT_MSR_STORE_COUNT, > + VM_EXIT_MSR_STORE_ADDR, > + maxphyaddr) && > + vmx_msr_switch_area_verify(vcpu, VM_ENTRY_MSR_LOAD_COUNT, > + VM_ENTRY_MSR_LOAD_ADDR, maxphyaddr); > +} > + > +static bool vmx_load_msr_entry_verify(struct kvm_vcpu *vcpu, > + struct vmx_msr_entry *e) > +{ > + if (e->index == MSR_FS_BASE || e->index == MSR_GS_BASE) > + return false; > + /* SMM is not supported */ > + if (e->index == MSR_IA32_SMM_MONITOR_CTL) > + return false; > + /* x2APIC MSR accesses are not allowed */ > + if (apic_x2apic_mode(vcpu->arch.apic) && (e->index >> 24) == 0x800) > + return false; > + if (e->reserved != 0) > + return false; > + return true; > +} > + > +static bool vmx_msr_switch_is_protected_msr(u32 msr_index) > +{ > + /* Intel SDM: a processor MAY prevent writing to these MSRs when > + * loading guest states on VM entries or saving guest states on VM > + * exits. > + * Assume our emulated processor DOES prevent writing */ Just a nit: please end the comment on a newline to be consistent. > + return msr_index == MSR_IA32_UCODE_WRITE || > + msr_index == MSR_IA32_UCODE_REV; > +} > + > +static unsigned int nested_vmx_load_msrs(struct kvm_vcpu *vcpu, > + u32 count, u64 addr) > +{ > + unsigned int i; > + struct vmx_msr_entry msr_entry; > + struct msr_data msr; > + > + for (i = 1; i <= count; i++, addr += sizeof(msr_entry)) { > + if (kvm_read_guest(vcpu->kvm, addr, > + &msr_entry, sizeof(msr_entry))) { > + pr_warn_ratelimited( > + "%s MSR %u: cannot read GPA: 0x%llx\n", > + __func__, i, addr); > + return i; > + } > + if (!vmx_load_msr_entry_verify(vcpu, &msr_entry)) { > + pr_warn_ratelimited( > + "%s MSR %u: invalid MSR entry 0x%x 0x%x\n", > + __func__, i, msr_entry.index, > + msr_entry.reserved); > + return i; > + } > + msr.host_initiated = false; > + msr.index = msr_entry.index; > + msr.data = msr_entry.value; > + if (vmx_msr_switch_is_protected_msr(msr.index)) { > + pr_warn_ratelimited( > + "%s MSR %u: prevent writing to MSR 0x%x\n", > + __func__, i, msr.index); > + return i; > + } > + if (kvm_set_msr(vcpu, &msr)) { > + pr_warn_ratelimited( > + "%s MSR %u: cannot write 0x%llx to MSR 0x%x\n", > + __func__, i, msr.data, msr.index); > + return i; > + } > + } > + return 0; > +} > + > +static bool vmx_store_msr_entry_verify(struct kvm_vcpu *vcpu, > + struct vmx_msr_entry *e) > +{ > + /* x2APIC MSR accesses are not allowed */ > + if (apic_x2apic_mode(vcpu->arch.apic) && (e->index >> 24) == 0x800) > + return false; > + /* SMM is not supported */ > + if (e->index == MSR_IA32_SMBASE) > + return false; > + if (e->reserved != 0) > + return false; > + return true; > +} > + > +static unsigned int nested_vmx_store_msrs(struct kvm_vcpu *vcpu, > + u32 count, u64 addr) > +{ > + unsigned int i; > + struct vmx_msr_entry msr_entry; > + > + for (i = 1; i <= count; i++, addr += sizeof(msr_entry)) { > + if (kvm_read_guest(vcpu->kvm, addr, > + &msr_entry, 2 * sizeof(u32))) { > + pr_warn_ratelimited( > + "%s MSR %u: cannot read GPA: 0x%llx\n", > + __func__, i, addr); > + return i; > + } > + if (!vmx_store_msr_entry_verify(vcpu, &msr_entry)) { > + pr_warn_ratelimited( > + "%s MSR %u: invalid MSR entry 0x%x 0x%x\n", > + __func__, i, msr_entry.index, > + msr_entry.reserved); > + return i; > + } > + if (vmx_get_msr(vcpu, msr_entry.index, &msr_entry.value)) { > + pr_warn_ratelimited( > + "%s MSR %u: cannot read MSR 0x%x\n", > + __func__, i, msr_entry.index); > + return i; > + } > + if (kvm_write_guest(vcpu->kvm, > + addr + offsetof(struct vmx_msr_entry, value), > + &msr_entry.value, sizeof(msr_entry.value))) { > + pr_warn_ratelimited( > + "%s MSR %u: cannot write to GPA: 0x%llx\n", > + __func__, i, addr); > + return i; > + } > + } > + return 0; > +} > + > /* > * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1 > * for running an L2 nested guest. > @@ -8580,6 +8742,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) > struct vmcs12 *vmcs12; > struct vcpu_vmx *vmx = to_vmx(vcpu); > int cpu; > + unsigned int msr; > struct loaded_vmcs *vmcs02; > bool ia32e; > > @@ -8629,11 +8792,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) > return 1; > } > > - if (vmcs12->vm_entry_msr_load_count > 0 || > - vmcs12->vm_exit_msr_load_count > 0 || > - vmcs12->vm_exit_msr_store_count > 0) { > - pr_warn_ratelimited("%s: VMCS MSR_{LOAD,STORE} unsupported\n", > - __func__); > + if (!nested_vmx_msr_switch_verify(vcpu, vmcs12)) { > nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); > return 1; > } > @@ -8739,10 +8898,20 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) > > vmx_segment_cache_clear(vmx); > > - vmcs12->launch_state = 1; > - > prepare_vmcs02(vcpu, vmcs12); > > + msr = nested_vmx_load_msrs(vcpu, vmcs12->vm_entry_msr_load_count, > + vmcs12->vm_entry_msr_load_addr); > + if (msr) { > + leave_guest_mode(vcpu); > + vmx_load_vmcs01(vcpu); > + nested_vmx_entry_failure(vcpu, vmcs12, > + EXIT_REASON_MSR_LOAD_FAILURE, msr); > + return 1; > + } > + > + vmcs12->launch_state = 1; > + > if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) > return kvm_emulate_halt(vcpu); > > @@ -9040,6 +9209,15 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, > kvm_clear_interrupt_queue(vcpu); > } > > +static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 abort_code) > +{ > + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); > + > + vmcs12->abort = abort_code; > + pr_warn("Nested VMX abort %u\n", abort_code); > + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); > +} > + > /* > * A part of what we need to when the nested L2 guest exits and we want to > * run its L1 parent, is to reset L1's guest state to the host state specified > @@ -9172,6 +9350,19 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, > > kvm_set_dr(vcpu, 7, 0x400); > vmcs_write64(GUEST_IA32_DEBUGCTL, 0); > + > + if (nested_vmx_load_msrs(vcpu, vmcs12->vm_exit_msr_load_count, > + vmcs12->vm_exit_msr_load_addr)) > + nested_vmx_abort(vcpu, VMX_ABORT_MSR_LOAD_FAILURE); > +} > + > +static bool vmx_is_entry_failure(u32 exit_reason) > +{ > + if ((exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) == 0) > + return false; > + exit_reason &= ~VMX_EXIT_REASONS_FAILED_VMENTRY; > + return exit_reason == EXIT_REASON_INVALID_STATE || > + exit_reason == EXIT_REASON_MSR_LOAD_FAILURE; > } > > /* > @@ -9193,6 +9384,11 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, > prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, > exit_qualification); > > + if (nested_vmx_store_msrs(vcpu, > + vmcs12->vm_exit_msr_store_count, > + vmcs12->vm_exit_msr_store_addr)) > + nested_vmx_abort(vcpu, VMX_ABORT_MSR_STORE_FAILURE); > + > vmx_load_vmcs01(vcpu); > > if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index c5c186a..77179c5 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -1579,6 +1579,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, > } > return 0; > } > +EXPORT_SYMBOL_GPL(kvm_write_guest); > > int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, > gpa_t gpa, unsigned long len) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html