David Hildenbrand <david@xxxxxxxxxx> writes: > On 10.07.2017 22:49, Bandan Das wrote: >> When L2 uses vmfunc, L0 utilizes the associated vmexit to >> emulate a switching of the ept pointer by reloading the >> guest MMU. >> >> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> >> Signed-off-by: Bandan Das <bsd@xxxxxxxxxx> >> --- >> arch/x86/include/asm/vmx.h | 6 +++++ >> arch/x86/kvm/vmx.c | 58 +++++++++++++++++++++++++++++++++++++++++++--- >> 2 files changed, 61 insertions(+), 3 deletions(-) >> >> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h >> index da5375e..5f63a2e 100644 >> --- a/arch/x86/include/asm/vmx.h >> +++ b/arch/x86/include/asm/vmx.h >> @@ -115,6 +115,10 @@ >> #define VMX_MISC_SAVE_EFER_LMA 0x00000020 >> #define VMX_MISC_ACTIVITY_HLT 0x00000040 >> >> +/* VMFUNC functions */ >> +#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 >> +#define VMFUNC_EPTP_ENTRIES 512 >> + >> static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) >> { >> return vmx_basic & GENMASK_ULL(30, 0); >> @@ -200,6 +204,8 @@ enum vmcs_field { >> EOI_EXIT_BITMAP2_HIGH = 0x00002021, >> EOI_EXIT_BITMAP3 = 0x00002022, >> EOI_EXIT_BITMAP3_HIGH = 0x00002023, >> + EPTP_LIST_ADDRESS = 0x00002024, >> + EPTP_LIST_ADDRESS_HIGH = 0x00002025, >> VMREAD_BITMAP = 0x00002026, >> VMWRITE_BITMAP = 0x00002028, >> XSS_EXIT_BITMAP = 0x0000202C, >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index fe8f5fc..0a969fb 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -246,6 +246,7 @@ struct __packed vmcs12 { >> u64 eoi_exit_bitmap1; >> u64 eoi_exit_bitmap2; >> u64 eoi_exit_bitmap3; >> + u64 eptp_list_address; >> u64 xss_exit_bitmap; >> u64 guest_physical_address; >> u64 vmcs_link_pointer; >> @@ -771,6 +772,7 @@ static const unsigned short vmcs_field_to_offset_table[] = { >> FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1), >> FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2), >> FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3), >> + FIELD64(EPTP_LIST_ADDRESS, eptp_list_address), >> FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap), >> FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), >> FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), >> @@ -1402,6 +1404,13 @@ static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12) >> return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC); >> } >> >> +static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12) >> +{ >> + return nested_cpu_has_vmfunc(vmcs12) && >> + (vmcs12->vm_function_control & > > I wonder if it makes sense to rename vm_function_control to > - vmfunc_control > - vmfunc_controls (so it matches nested_vmx_vmfunc_controls) > - vmfunc_ctrl I tend to follow the SDM names because it's easy to look for them. >> + VMX_VMFUNC_EPTP_SWITCHING); >> +} >> + >> static inline bool is_nmi(u32 intr_info) >> { >> return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) >> @@ -2791,7 +2800,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) >> if (cpu_has_vmx_vmfunc()) { >> vmx->nested.nested_vmx_secondary_ctls_high |= >> SECONDARY_EXEC_ENABLE_VMFUNC; >> - vmx->nested.nested_vmx_vmfunc_controls = 0; >> + /* >> + * Advertise EPTP switching unconditionally >> + * since we emulate it >> + */ >> + vmx->nested.nested_vmx_vmfunc_controls = >> + VMX_VMFUNC_EPTP_SWITCHING;> } >> >> /* >> @@ -7772,6 +7786,9 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu) >> struct vcpu_vmx *vmx = to_vmx(vcpu); >> struct vmcs12 *vmcs12; >> u32 function = vcpu->arch.regs[VCPU_REGS_RAX]; >> + u32 index = vcpu->arch.regs[VCPU_REGS_RCX]; >> + struct page *page = NULL; >> + u64 *l1_eptp_list, address; >> >> /* >> * VMFUNC is only supported for nested guests, but we always enable the >> @@ -7784,11 +7801,46 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu) >> } >> >> vmcs12 = get_vmcs12(vcpu); >> - if ((vmcs12->vm_function_control & (1 << function)) == 0) >> + if (((vmcs12->vm_function_control & (1 << function)) == 0) || >> + WARN_ON_ONCE(function)) > > "... instruction causes a VM exit if the bit at position EAX is 0 in the > VM-function controls (the selected VM function is > not enabled)." > > So g2 can trigger this WARN_ON_ONCE, no? I think we should drop it then > completely. It's a good hint to see if L2 misbehaved and WARN_ON_ONCE makes sure it's not misused. >> + goto fail; >> + >> + if (!nested_cpu_has_ept(vmcs12) || >> + !nested_cpu_has_eptp_switching(vmcs12)) >> + goto fail; >> + >> + if (!vmcs12->eptp_list_address || index >= VMFUNC_EPTP_ENTRIES) >> + goto fail; > > I can find the definition for an vmexit in case of index >= > VMFUNC_EPTP_ENTRIES, but not for !vmcs12->eptp_list_address in the SDM. > > Can you give me a hint? I don't think there is. Since, we are basically emulating eptp switching for L2, this is a good check to have. >> + >> + page = nested_get_page(vcpu, vmcs12->eptp_list_address); >> + if (!page) >> goto fail; >> - WARN_ONCE(1, "VMCS12 VM function control should have been zero"); >> + >> + l1_eptp_list = kmap(page); >> + address = l1_eptp_list[index]; >> + if (!address) >> + goto fail; > > Can you move that check to the other address checks below? (or rework if > this make sense, see below) > >> + /* >> + * If the (L2) guest does a vmfunc to the currently >> + * active ept pointer, we don't have to do anything else >> + */ >> + if (vmcs12->ept_pointer != address) { >> + if (address >> cpuid_maxphyaddr(vcpu) || >> + !IS_ALIGNED(address, 4096)) > > Couldn't the pfn still be invalid and make kvm_mmu_reload() fail? > (triggering a KVM_REQ_TRIPLE_FAULT) If there's a triple fault, I think it's a good idea to inject it back. Basically, there's no need to take care of damage control that L1 is intentionally doing. >> + goto fail; >> + kvm_mmu_unload(vcpu); >> + vmcs12->ept_pointer = address; >> + kvm_mmu_reload(vcpu); > > I was thinking about something like this: > > kvm_mmu_unload(vcpu); > old = vmcs12->ept_pointer; > vmcs12->ept_pointer = address; > if (kvm_mmu_reload(vcpu)) { > /* pointer invalid, restore previous state */ > kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu); > vmcs12->ept_pointer = old; > kvm_mmu_reload(vcpu); > goto fail; > } > > The you can inherit the checks from mmu_check_root(). > > > Wonder why I can't spot checks for cpuid_maxphyaddr() / > IS_ALIGNED(address, 4096) for ordinary use of vmcs12->ept_pointer. The > checks should be identical. I think the reason is vmcs12->ept_pointer is never used directly. It's used to create a shadow table but nevertheless, the check should be there. > >> + kunmap(page); >> + nested_release_page_clean(page); > > shouldn't the kunmap + nested_release_page_clean go outside the if clause? :) Indeed, thanks for the catch. Bandan >> + } >> + return kvm_skip_emulated_instruction(vcpu); >> >> fail: >> + if (page) { >> + kunmap(page); >> + nested_release_page_clean(page); >> + } >> nested_vmx_vmexit(vcpu, vmx->exit_reason, >> vmcs_read32(VM_EXIT_INTR_INFO), >> vmcs_readl(EXIT_QUALIFICATION)); >> > > David and mmu code are not yet best friends. So sorry if I am missing > something.