Hi David, David Hildenbrand <david@xxxxxxxxxx> writes: >> +static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12) >> +{ >> + return nested_cpu_has_vmfunc(vmcs12) && >> + (vmcs12->vm_function_control & >> + VMX_VMFUNC_EPTP_SWITCHING); >> +} >> + >> static inline bool is_nmi(u32 intr_info) >> { >> return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) >> @@ -2791,7 +2800,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) >> if (cpu_has_vmx_vmfunc()) { >> vmx->nested.nested_vmx_secondary_ctls_high |= >> SECONDARY_EXEC_ENABLE_VMFUNC; >> - vmx->nested.nested_vmx_vmfunc_controls = 0; >> + /* >> + * Advertise EPTP switching unconditionally >> + * since we emulate it >> + */ >> + vmx->nested.nested_vmx_vmfunc_controls = >> + VMX_VMFUNC_EPTP_SWITCHING; > > Should this only be advertised, if enable_ept is set (if the guest also > sees/can use SECONDARY_EXEC_ENABLE_EPT)? This represents the function control MSR, which on the hardware is a RO value. The checks for enable_ept and such are somewhere else. >> } >> >> /* >> @@ -7767,6 +7781,85 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu) >> return 1; >> } >> >> +static bool check_ept_address_valid(struct kvm_vcpu *vcpu, u64 address) > > check_..._valid -> valid_ept_address() ? I think either of the names is fine and I would prefer not to respin unless you feel really strongly about it :) > >> +{ >> + struct vcpu_vmx *vmx = to_vmx(vcpu); >> + u64 mask = VMX_EPT_RWX_MASK; >> + int maxphyaddr = cpuid_maxphyaddr(vcpu); >> + struct kvm_mmu *mmu = vcpu->arch.walk_mmu; >> + >> + /* Check for execute_only validity */ >> + if ((address & mask) == VMX_EPT_EXECUTABLE_MASK) { >> + if (!(vmx->nested.nested_vmx_ept_caps & >> + VMX_EPT_EXECUTE_ONLY_BIT)) >> + return false; >> + } >> + >> + /* Bits 5:3 must be 3 */ >> + if (((address >> VMX_EPT_GAW_EPTP_SHIFT) & 0x7) != VMX_EPT_DEFAULT_GAW) >> + return false; >> + >> + /* Reserved bits should not be set */ >> + if (address >> maxphyaddr || ((address >> 7) & 0x1f)) >> + return false; >> + >> + /* AD, if set, should be supported */ >> + if ((address & VMX_EPT_AD_ENABLE_BIT)) { >> + if (!enable_ept_ad_bits) >> + return false; >> + mmu->ept_ad = true; >> + } else >> + mmu->ept_ad = false; > > I wouldn't expect a "check" function to modify the mmu. Can you move > modifying the mmu outside of this function (leaving the > enable_ept_ad_bits check in place)? (and maybe even set mmu->ept_ad > _after_ the kvm_mmu_unload(vcpu)?, just when setting vmcs12->ept_pointer?) > Well, the correct thing to do is have a wrapper around it in mmu.c without directly calling here and also call this function before nested_mmu is initialized. I am working on a separate patch for this btw. It seems to me setting mmu->ept_ad after kvm_mmu_unload is unnecessary since it's already being set only if everything else succeeds. kvm_mmu_unload() isn't affected by the setting of this flag if I understand correctly. >> + >> + return true; >> +} >> + >> +static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu, >> + struct vmcs12 *vmcs12) >> +{ >> + u32 index = vcpu->arch.regs[VCPU_REGS_RCX]; >> + u64 *l1_eptp_list, address; >> + struct page *page; >> + >> + if (!nested_cpu_has_eptp_switching(vmcs12) || >> + !nested_cpu_has_ept(vmcs12)) >> + return 1; >> + >> + if (index >= VMFUNC_EPTP_ENTRIES) >> + return 1; >> + >> + page = nested_get_page(vcpu, vmcs12->eptp_list_address); >> + if (!page) >> + return 1; >> + >> + l1_eptp_list = kmap(page); >> + address = l1_eptp_list[index]; >> + >> + /* >> + * If the (L2) guest does a vmfunc to the currently >> + * active ept pointer, we don't have to do anything else >> + */ >> + if (vmcs12->ept_pointer != address) { >> + if (!check_ept_address_valid(vcpu, address)) { >> + kunmap(page); >> + nested_release_page_clean(page); >> + return 1; >> + } >> + kvm_mmu_unload(vcpu); >> + vmcs12->ept_pointer = address; >> + /* >> + * TODO: Check what's the correct approach in case >> + * mmu reload fails. Currently, we just let the next >> + * reload potentially fail >> + */ >> + kvm_mmu_reload(vcpu); > > So, what actually happens if this generates a tripple fault? I guess we > will kill the (nested) hypervisor? Yes. Not sure what's the right thing to do is though... Bandan >> + } >> + >> + kunmap(page); >> + nested_release_page_clean(page); >> + return 0; >> +} >> + >> static int handle_vmfunc(struct kvm_vcpu *vcpu) >> { >> struct vcpu_vmx *vmx = to_vmx(vcpu); >> @@ -7786,7 +7879,16 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu) >> vmcs12 = get_vmcs12(vcpu); >> if ((vmcs12->vm_function_control & (1 << function)) == 0) >> goto fail; >> - WARN_ONCE(1, "VMCS12 VM function control should have been zero"); >> + >> + switch (function) { >> + case 0: >> + if (nested_vmx_eptp_switching(vcpu, vmcs12)) >> + goto fail; >> + break; >> + default: >> + goto fail; >> + } >> + return kvm_skip_emulated_instruction(vcpu); >> >> fail: >> nested_vmx_vmexit(vcpu, vmx->exit_reason, >> @@ -10354,10 +10456,20 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) >> vmx->nested.nested_vmx_entry_ctls_high)) >> return VMXERR_ENTRY_INVALID_CONTROL_FIELD; >> >> - if (nested_cpu_has_vmfunc(vmcs12) && >> - (vmcs12->vm_function_control & >> - ~vmx->nested.nested_vmx_vmfunc_controls)) >> - return VMXERR_ENTRY_INVALID_CONTROL_FIELD; >> + if (nested_cpu_has_vmfunc(vmcs12)) { >> + if (vmcs12->vm_function_control & >> + ~vmx->nested.nested_vmx_vmfunc_controls) >> + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; >> + >> + if (nested_cpu_has_eptp_switching(vmcs12)) { >> + if (!nested_cpu_has_ept(vmcs12) || >> + (vmcs12->eptp_list_address >> >> + cpuid_maxphyaddr(vcpu)) || >> + !IS_ALIGNED(vmcs12->eptp_list_address, 4096)) >> + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; >> + } >> + } >> + >> >> if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) >> return VMXERR_ENTRY_INVALID_CONTROL_FIELD; >>