[Ccing a few others who might be interested] Hi Eugene, Did you look at the other patch that was posted for this functionality by Wincy Van ? https://lkml.org/lkml/2014/11/21/749 Eugene Korenevsky <ekorenevsky@xxxxxxxxx> writes: > BitVisor hypervisor fails to start a nested guest due to lack of MSR > load/store support in KVM. > > This patch fixes this problem according to Intel SDM. It would be helpful for the commit message to be a little more descriptive. > > Signed-off-by: Eugene Korenevsky <ekorenevsky@xxxxxxxxx> > --- > arch/x86/include/asm/vmx.h | 6 ++ > arch/x86/include/uapi/asm/msr-index.h | 3 + > arch/x86/include/uapi/asm/vmx.h | 1 + > arch/x86/kvm/vmx.c | 191 +++++++++++++++++++++++++++++++++- > virt/kvm/kvm_main.c | 1 + > 5 files changed, 197 insertions(+), 5 deletions(-) > > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > index bcbfade..e07414c 100644 > --- a/arch/x86/include/asm/vmx.h > +++ b/arch/x86/include/asm/vmx.h > @@ -454,6 +454,12 @@ struct vmx_msr_entry { > #define ENTRY_FAIL_VMCS_LINK_PTR 4 > > /* > + * VMX Abort codes > + */ > +#define VMX_ABORT_MSR_STORE 1 > +#define VMX_ABORT_MSR_LOAD 4 > + > +/* > * VM-instruction error numbers > */ > enum vm_instruction_error_number { > diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h > index e21331c..3c9c601 100644 > --- a/arch/x86/include/uapi/asm/msr-index.h > +++ b/arch/x86/include/uapi/asm/msr-index.h > @@ -316,6 +316,9 @@ > #define MSR_IA32_UCODE_WRITE 0x00000079 > #define MSR_IA32_UCODE_REV 0x0000008b > > +#define MSR_IA32_SMM_MONITOR_CTL 0x0000009b > +#define MSR_IA32_SMBASE 0x0000009e > + Extra tab here ? > #define MSR_IA32_PERF_STATUS 0x00000198 > #define MSR_IA32_PERF_CTL 0x00000199 > #define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 > diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h > index 990a2fe..f5f8a62 100644 > --- a/arch/x86/include/uapi/asm/vmx.h > +++ b/arch/x86/include/uapi/asm/vmx.h > @@ -56,6 +56,7 @@ > #define EXIT_REASON_MSR_READ 31 > #define EXIT_REASON_MSR_WRITE 32 > #define EXIT_REASON_INVALID_STATE 33 > +#define EXIT_REASON_MSR_LOAD_FAILURE 34 > #define EXIT_REASON_MWAIT_INSTRUCTION 36 > #define EXIT_REASON_MONITOR_INSTRUCTION 39 > #define EXIT_REASON_PAUSE_INSTRUCTION 40 > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 6a951d8..deebc96 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -8498,6 +8498,163 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip); > } > > +struct msr_entry { > + u32 index; > + u32 reserved; > + u64 data; > +} __packed; > + We already have struct vmx_msr_entry in vmx.h > +static bool vmx_msr_switch_area_verify(u32 count, u64 addr, int maxphyaddr) > +{ > + if (count == 0) > + return true; > + if ((addr & 0xf) != 0) > + return false; > + if ((addr >> maxphyaddr) != 0) > + return false; > + if (((addr + count * sizeof(struct msr_entry) - 1) >> maxphyaddr) != 0) > + return false; > + return true; > +} > + Shouldn't we also check if any of bits 63:32 is set ? > +static bool nested_vmx_msr_switch_verify(struct kvm_vcpu *vcpu, > + struct vmcs12 *vmcs12) > +{ > + int maxphyaddr = cpuid_maxphyaddr(vcpu); > + > +#define VMCS12_MSR_SWITCH_VERIFY(name) { \ > + if (!vmx_msr_switch_area_verify(vmcs12->name##_count, \ > + vmcs12->name##_addr, maxphyaddr)) { \ > + pr_warn_ratelimited( \ > + "%s: invalid MSR_{LOAD,STORE} parameters", \ > + #name); \ > + return false; \ > + } \ > + } Just a personal opinion: replacing the macro with a switch statement is probably a little less clumsy > + VMCS12_MSR_SWITCH_VERIFY(vm_exit_msr_load); > + VMCS12_MSR_SWITCH_VERIFY(vm_exit_msr_store); > + VMCS12_MSR_SWITCH_VERIFY(vm_entry_msr_load); > + return true; > +} > + > +static bool vmx_load_msr_entry_verify(struct kvm_vcpu *vcpu, > + struct msr_entry *e) > +{ > + if (e->index == MSR_FS_BASE || e->index == MSR_GS_BASE) > + return false; > + /* SMM is not supported */ > + if (e->index == MSR_IA32_SMM_MONITOR_CTL) > + return false; > + /* x2APIC MSR accesses are not allowed */ > + if (apic_x2apic_mode(vcpu->arch.apic) && (e->index >> 24) == 0x800) > + return false; > + if (e->reserved != 0) > + return false; > + return true; > +} > + > +static bool vmx_msr_switch_is_protected_msr(u32 msr_index) > +{ > + /* Intel SDM: a processor MAY prevent writing to these MSRs when > + * loading guest states on VM entries or saving guest states on VM > + * exits. > + * Assume our emulated processor DOES prevent writing */ > + return msr_index == MSR_IA32_UCODE_WRITE || > + msr_index == MSR_IA32_UCODE_REV; > +} > + > +static unsigned int nested_vmx_load_msrs(struct kvm_vcpu *vcpu, > + u32 count, u64 addr) > +{ > + unsigned int i; > + struct msr_entry msr_entry; > + struct msr_data msr; > + > + for (i = 1; i <= count; i++, addr += sizeof(msr_entry)) { > + if (kvm_read_guest(vcpu->kvm, addr, > + &msr_entry, sizeof(msr_entry))) { > + pr_warn_ratelimited( > + "Load MSR %d: cannot read GPA: 0x%llx\n", > + i, addr); > + return i; > + } > + if (!vmx_load_msr_entry_verify(vcpu, &msr_entry)) { > + pr_warn_ratelimited( > + "Load MSR %d: invalid MSR entry 0x%x 0x%x\n", > + i, msr_entry.index, msr_entry.reserved); > + return i; > + } > + msr.host_initiated = 0; > + msr.index = msr_entry.index; > + msr.data = msr_entry.data; > + if (vmx_msr_switch_is_protected_msr(msr.index)) { > + pr_warn_ratelimited( > + "Load MSR %d: prevent writing to MSR 0x%x\n", > + i, msr.index); > + return i; > + } > + if (kvm_set_msr(vcpu, &msr)) { > + pr_warn_ratelimited( > + "Load MSR %d: cannot write 0x%llx to MSR 0x%x\n", > + i, msr.data, msr.index); > + return i; > + } > + } > + return 0; > +} > + To ease debugging, these warning messages should also specify that they are Nested VMX specific. Also I think that the interfaces/functions you have introduced can be split up into an introductory patch and a second patch can then use these interfaces to provide the load/store functionality. > +static bool vmx_store_msr_entry_verify(struct kvm_vcpu *vcpu, > + struct msr_entry *e) > +{ > + /* x2APIC MSR accesses are not allowed */ > + if (apic_x2apic_mode(vcpu->arch.apic) && (e->index >> 24) == 0x800) > + return false; > + /* SMM is not supported */ > + if (e->index == MSR_IA32_SMBASE) > + return false; > + if (e->reserved != 0) > + return false; > + return true; > +} > + > +static unsigned int nested_vmx_store_msrs(struct kvm_vcpu *vcpu, > + u32 count, u64 addr) > +{ > + unsigned int i; > + struct msr_entry msr_entry; > + > + for (i = 1; i <= count; i++, addr += sizeof(msr_entry)) { > + if (kvm_read_guest(vcpu->kvm, addr, > + &msr_entry, 2 * sizeof(u32))) { > + pr_warn_ratelimited( > + "Store MSR %d: cannot read GPA: 0x%llx\n", > + i, addr); > + return i; > + } > + if (!vmx_store_msr_entry_verify(vcpu, &msr_entry)) { > + pr_warn_ratelimited( > + "Store MSR %d: invalid MSR entry 0x%x 0x%x\n", > + i, msr_entry.index, msr_entry.reserved); > + return i; > + } > + if (vmx_get_msr(vcpu, msr_entry.index, &msr_entry.data)) { > + pr_warn_ratelimited( > + "Store MSR %d: cannot read MSR 0x%x\n", > + i, msr_entry.index); > + return i; > + } > + if (kvm_write_guest(vcpu->kvm, > + addr + offsetof(struct msr_entry, data), > + &msr_entry.data, sizeof(msr_entry.data))) { > + pr_warn_ratelimited( > + "Store MSR %d: cannot write to GPA: 0x%llx\n", > + i, addr); > + return i; > + } > + } > + return 0; > +} > + > /* > * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1 > * for running an L2 nested guest. > @@ -8507,6 +8664,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) > struct vmcs12 *vmcs12; > struct vcpu_vmx *vmx = to_vmx(vcpu); > int cpu; > + unsigned int msr; > struct loaded_vmcs *vmcs02; > bool ia32e; > > @@ -8556,11 +8714,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) > return 1; > } > > - if (vmcs12->vm_entry_msr_load_count > 0 || > - vmcs12->vm_exit_msr_load_count > 0 || > - vmcs12->vm_exit_msr_store_count > 0) { > - pr_warn_ratelimited("%s: VMCS MSR_{LOAD,STORE} unsupported\n", > - __func__); > + if (!nested_vmx_msr_switch_verify(vcpu, vmcs12)) { > nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); > return 1; > } > @@ -8670,6 +8824,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) > > prepare_vmcs02(vcpu, vmcs12); > > + msr = nested_vmx_load_msrs(vcpu, vmcs12->vm_entry_msr_load_count, > + vmcs12->vm_entry_msr_load_addr); > + if (msr) > + nested_vmx_entry_failure(vcpu, vmcs12, > + EXIT_REASON_MSR_LOAD_FAILURE, msr); > + > if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) > return kvm_emulate_halt(vcpu); > > @@ -9099,6 +9259,15 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, > vmcs_write64(GUEST_IA32_DEBUGCTL, 0); > } > > +static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 abort_code) > +{ > + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); > + > + vmcs12->abort = abort_code; > + pr_warn("Nested VMX abort %u\n", abort_code); > + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); > +} > + > /* > * Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1 > * and modify vmcs12 to make it see what it would expect to see there if > @@ -9118,8 +9287,20 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, > prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, > exit_qualification); > > + if (exit_reason != EXIT_REASON_INVALID_STATE && > + exit_reason != EXIT_REASON_MSR_LOAD_FAILURE) { > + if (nested_vmx_store_msrs(vcpu, > + vmcs12->vm_exit_msr_store_count, > + vmcs12->vm_exit_msr_store_addr)) > + nested_vmx_abort(vcpu, VMX_ABORT_MSR_STORE); > + } > + > vmx_load_vmcs01(vcpu); > > + if (nested_vmx_load_msrs(vcpu, vmcs12->vm_exit_msr_load_count, > + vmcs12->vm_exit_msr_load_addr)) > + nested_vmx_abort(vcpu, VMX_ABORT_MSR_LOAD); > + > if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) > && nested_exit_intr_ack_set(vcpu)) { > int irq = kvm_cpu_get_interrupt(vcpu); > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 5b45330..c9d1c4a 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -1582,6 +1582,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, > } > return 0; > } > +EXPORT_SYMBOL_GPL(kvm_write_guest); > > int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, > gpa_t gpa, unsigned long len) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html