If we let L1 use EPT, we should probably also support the INVEPT instruction. Signed-off-by: Nadav Har'El <nyh@xxxxxxxxxx> --- arch/x86/include/asm/vmx.h | 2 arch/x86/kvm/vmx.c | 112 +++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) --- .before/arch/x86/include/asm/vmx.h 2011-11-10 11:33:59.000000000 +0200 +++ .after/arch/x86/include/asm/vmx.h 2011-11-10 11:33:59.000000000 +0200 @@ -279,6 +279,7 @@ enum vmcs_field { #define EXIT_REASON_APIC_ACCESS 44 #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_INVEPT 50 #define EXIT_REASON_WBINVD 54 #define EXIT_REASON_XSETBV 55 @@ -404,6 +405,7 @@ enum vmcs_field { #define VMX_EPTP_WB_BIT (1ull << 14) #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) #define VMX_EPT_1GB_PAGE_BIT (1ull << 17) +#define VMX_EPT_INVEPT_BIT (1ull << 20) #define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) --- .before/arch/x86/kvm/vmx.c 2011-11-10 11:33:59.000000000 +0200 +++ .after/arch/x86/kvm/vmx.c 2011-11-10 11:33:59.000000000 +0200 @@ -351,6 +351,8 @@ struct nested_vmx { struct list_head vmcs02_pool; int vmcs02_num; u64 vmcs01_tsc_offset; + /* Remember last EPT02, for single-context INVEPT optimization */ + u64 last_eptp02; /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; /* @@ -1987,6 +1989,10 @@ static __init void nested_vmx_setup_ctls /* ept capabilities */ if (nested_ept) { nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT; + nested_vmx_ept_caps |= + VMX_EPT_INVEPT_BIT | VMX_EPT_EXTENT_GLOBAL_BIT | + VMX_EPT_EXTENT_CONTEXT_BIT | + VMX_EPT_EXTENT_INDIVIDUAL_BIT; nested_vmx_ept_caps &= vmx_capability.ept; } else nested_vmx_ept_caps = 0; @@ -5568,6 +5574,105 @@ static int handle_vmptrst(struct kvm_vcp return 1; } +/* Emulate the INVEPT instruction */ +static int handle_invept(struct kvm_vcpu *vcpu) +{ + u32 vmx_instruction_info; + unsigned long type; + gva_t gva; + struct x86_exception e; + struct { + u64 eptp, gpa; + } operand; + + + if (!nested_ept || !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + if (!nested_vmx_check_permission(vcpu)) + return 1; + + if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + /* According to the Intel VMX instruction reference, the memory + * operand is read even if it isn't needed (e.g., for type==global) + */ + vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); + if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), + vmx_instruction_info, &gva)) + return 1; + if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand, + sizeof(operand), &e)) { + kvm_inject_page_fault(vcpu, &e); + return 1; + } + + type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf); + + switch (type) { + case VMX_EPT_EXTENT_GLOBAL: + if (!(nested_vmx_ept_caps & VMX_EPT_EXTENT_GLOBAL_BIT)) + nested_vmx_failValid(vcpu, + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); + else { + ept_sync_global(); + nested_vmx_succeed(vcpu); + } + break; + case VMX_EPT_EXTENT_CONTEXT: + if (!(nested_vmx_ept_caps & VMX_EPT_EXTENT_CONTEXT_BIT)) + nested_vmx_failValid(vcpu, + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); + else { + /* + * We efficiently handle the common case, of L1 + * invalidating the last eptp it used to run L2. + * TODO: Instead of saving one last_eptp02, look up + * operand.eptp in the shadow EPT table cache, to + * find its shadow. Then last_eptp02 won't be needed. + */ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + struct vcpu_vmx *vmx = to_vmx(vcpu); + if (vmcs12 && nested_cpu_has_ept(vmcs12) && + (vmcs12->ept_pointer == operand.eptp) && + vmx->nested.last_eptp02) + ept_sync_context(vmx->nested.last_eptp02); + else + ept_sync_global(); + nested_vmx_succeed(vcpu); + } + break; + case VMX_EPT_EXTENT_INDIVIDUAL_ADDR: + if (!(nested_vmx_ept_caps & VMX_EPT_EXTENT_INDIVIDUAL_BIT)) + nested_vmx_failValid(vcpu, + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); + else { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + struct vcpu_vmx *vmx = to_vmx(vcpu); + if (vmcs12 && nested_cpu_has_ept(vmcs12) && + (vmcs12->ept_pointer == operand.eptp) && + vmx->nested.last_eptp02) + ept_sync_individual_addr( + vmx->nested.last_eptp02, operand.gpa); + else + ept_sync_global(); + nested_vmx_succeed(vcpu); + } + break; + default: + nested_vmx_failValid(vcpu, + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); + } + + skip_emulated_instruction(vcpu); + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -5609,6 +5714,7 @@ static int (*kvm_vmx_exit_handlers[])(st [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op, [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op, + [EXIT_REASON_INVEPT] = handle_invept, }; static const int kvm_vmx_max_exit_handlers = @@ -5793,6 +5899,7 @@ static bool nested_vmx_exit_handled(stru case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: + case EXIT_REASON_INVEPT: /* * VMX instructions trap unconditionally. This allows L1 to * emulate them for its L2 guest, i.e., allows 3-level nesting! @@ -7056,6 +7163,11 @@ void prepare_vmcs12(struct kvm_vcpu *vcp /* clear vm-entry fields which are to be cleared on exit */ if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK; + + /* For single-context INVEPT optimization */ + if (nested_cpu_has_ept(vmcs12)) + to_vmx(vcpu)->nested.last_eptp02 = vmcs_read64(EPT_POINTER); + } /* -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html