On Sun, 2014-11-02 at 10:03 +0100, Paolo Bonzini wrote: > You can just use the same scheme as your patch 88/102: Why is that? Why should I not use the upstream version? Ben. > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 685b8448d6e2..bd8cc9055fe2 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -6740,6 +6740,12 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) > return 1; > } > > +static int handle_invept(struct kvm_vcpu *vcpu) > +{ > + kvm_queue_exception(vcpu, UD_VECTOR); > + return 1; > +} > + > /* > * The exit handlers return 1 if the exit was handled fully and guest execution > * may resume. Otherwise they set the kvm_run parameter to indicate what needs > @@ -6785,6 +6791,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { > [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, > [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op, > [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op, > + [EXIT_REASON_INVEPT] = handle_invept, > }; > > static const int kvm_vmx_max_exit_handlers = > @@ -7020,6 +7027,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) > case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: > case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: > case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: > + case EXIT_REASON_INVEPT: > /* > * VMX instructions trap unconditionally. This allows L1 to > * emulate them for its L2 guest, i.e., allows 3-level nesting! > > > Paolo > > On 01/11/2014 23:28, Ben Hutchings wrote: > > 3.2.64-rc1 review patch. If anyone has any objections, please let me know. > > > > ------------------ > > > > From: Nadav Har'El <nyh@xxxxxxxxxx> > > > > commit bfd0a56b90005f8c8a004baf407ad90045c2b11e upstream. > > > > If we let L1 use EPT, we should probably also support the INVEPT instruction. > > > > In our current nested EPT implementation, when L1 changes its EPT table > > for L2 (i.e., EPT12), L0 modifies the shadow EPT table (EPT02), and in > > the course of this modification already calls INVEPT. But if last level > > of shadow page is unsync not all L1's changes to EPT12 are intercepted, > > which means roots need to be synced when L1 calls INVEPT. Global INVEPT > > should not be different since roots are synced by kvm_mmu_load() each > > time EPTP02 changes. > > > > Reviewed-by: Xiao Guangrong <xiaoguangrong@xxxxxxxxxxxxxxxxxx> > > Signed-off-by: Nadav Har'El <nyh@xxxxxxxxxx> > > Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx> > > Signed-off-by: Xinhao Xu <xinhao.xu@xxxxxxxxx> > > Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx> > > Signed-off-by: Gleb Natapov <gleb@xxxxxxxxxx> > > Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> > > [bwh: Backported to 3.2: > > - Adjust context, filename > > - Add definition of nested_ept_get_cr3(), added upstream by commit > > 155a97a3d7c7 ("nEPT: MMU context for nested EPT")] > > Signed-off-by: Ben Hutchings <ben@xxxxxxxxxxxxxxx> > > --- > > --- a/arch/x86/include/asm/vmx.h > > +++ b/arch/x86/include/asm/vmx.h > > @@ -279,6 +279,7 @@ enum vmcs_field { > > #define EXIT_REASON_APIC_ACCESS 44 > > #define EXIT_REASON_EPT_VIOLATION 48 > > #define EXIT_REASON_EPT_MISCONFIG 49 > > +#define EXIT_REASON_INVEPT 50 > > #define EXIT_REASON_WBINVD 54 > > #define EXIT_REASON_XSETBV 55 > > > > @@ -397,6 +398,7 @@ enum vmcs_field { > > #define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0 > > #define VMX_EPT_EXTENT_CONTEXT 1 > > #define VMX_EPT_EXTENT_GLOBAL 2 > > +#define VMX_EPT_EXTENT_SHIFT 24 > > > > #define VMX_EPT_EXECUTE_ONLY_BIT (1ull) > > #define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6) > > @@ -404,6 +406,7 @@ enum vmcs_field { > > #define VMX_EPTP_WB_BIT (1ull << 14) > > #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) > > #define VMX_EPT_1GB_PAGE_BIT (1ull << 17) > > +#define VMX_EPT_INVEPT_BIT (1ull << 20) > > #define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) > > #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) > > #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) > > --- a/arch/x86/kvm/mmu.c > > +++ b/arch/x86/kvm/mmu.c > > @@ -2869,6 +2869,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu > > mmu_sync_roots(vcpu); > > spin_unlock(&vcpu->kvm->mmu_lock); > > } > > +EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots); > > > > static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, > > u32 access, struct x86_exception *exception) > > @@ -3131,6 +3132,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu * > > ++vcpu->stat.tlb_flush; > > kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); > > } > > +EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb); > > > > static void paging_new_cr3(struct kvm_vcpu *vcpu) > > { > > --- a/arch/x86/kvm/vmx.c > > +++ b/arch/x86/kvm/vmx.c > > @@ -602,6 +602,7 @@ static void nested_release_page_clean(st > > kvm_release_page_clean(page); > > } > > > > +static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); > > static u64 construct_eptp(unsigned long root_hpa); > > static void kvm_cpu_vmxon(u64 addr); > > static void kvm_cpu_vmxoff(void); > > @@ -1899,6 +1900,7 @@ static u32 nested_vmx_secondary_ctls_low > > static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; > > static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; > > static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; > > +static u32 nested_vmx_ept_caps; > > static __init void nested_vmx_setup_ctls_msrs(void) > > { > > /* > > @@ -5550,6 +5552,74 @@ static int handle_vmptrst(struct kvm_vcp > > return 1; > > } > > > > +/* Emulate the INVEPT instruction */ > > +static int handle_invept(struct kvm_vcpu *vcpu) > > +{ > > + u32 vmx_instruction_info, types; > > + unsigned long type; > > + gva_t gva; > > + struct x86_exception e; > > + struct { > > + u64 eptp, gpa; > > + } operand; > > + u64 eptp_mask = ((1ull << 51) - 1) & PAGE_MASK; > > + > > + if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) || > > + !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) { > > + kvm_queue_exception(vcpu, UD_VECTOR); > > + return 1; > > + } > > + > > + if (!nested_vmx_check_permission(vcpu)) > > + return 1; > > + > > + if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) { > > + kvm_queue_exception(vcpu, UD_VECTOR); > > + return 1; > > + } > > + > > + vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); > > + type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf); > > + > > + types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; > > + > > + if (!(types & (1UL << type))) { > > + nested_vmx_failValid(vcpu, > > + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); > > + return 1; > > + } > > + > > + /* According to the Intel VMX instruction reference, the memory > > + * operand is read even if it isn't needed (e.g., for type==global) > > + */ > > + if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), > > + vmx_instruction_info, &gva)) > > + return 1; > > + if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand, > > + sizeof(operand), &e)) { > > + kvm_inject_page_fault(vcpu, &e); > > + return 1; > > + } > > + > > + switch (type) { > > + case VMX_EPT_EXTENT_CONTEXT: > > + if ((operand.eptp & eptp_mask) != > > + (nested_ept_get_cr3(vcpu) & eptp_mask)) > > + break; > > + case VMX_EPT_EXTENT_GLOBAL: > > + kvm_mmu_sync_roots(vcpu); > > + kvm_mmu_flush_tlb(vcpu); > > + nested_vmx_succeed(vcpu); > > + break; > > + default: > > + BUG_ON(1); > > + break; > > + } > > + > > + skip_emulated_instruction(vcpu); > > + return 1; > > +} > > + > > /* > > * The exit handlers return 1 if the exit was handled fully and guest execution > > * may resume. Otherwise they set the kvm_run parameter to indicate what needs > > @@ -5591,6 +5661,7 @@ static int (*kvm_vmx_exit_handlers[])(st > > [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, > > [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op, > > [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op, > > + [EXIT_REASON_INVEPT] = handle_invept, > > }; > > > > static const int kvm_vmx_max_exit_handlers = > > @@ -5775,6 +5846,7 @@ static bool nested_vmx_exit_handled(stru > > case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: > > case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: > > case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: > > + case EXIT_REASON_INVEPT: > > /* > > * VMX instructions trap unconditionally. This allows L1 to > > * emulate them for its L2 guest, i.e., allows 3-level nesting! > > @@ -6436,6 +6508,12 @@ static void vmx_set_supported_cpuid(u32 > > entry->ecx |= bit(X86_FEATURE_VMX); > > } > > > > +static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) > > +{ > > + /* return the page table to be shadowed - in our case, EPT12 */ > > + return get_vmcs12(vcpu)->ept_pointer; > > +} > > + > > /* > > * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested > > * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it > > -- Ben Hutchings Power corrupts. Absolute power is kind of neat. - John Lehman, Secretary of the US Navy 1981-1987
Attachment:
signature.asc
Description: This is a digitally signed message part