Re: [PATCH 3.2 087/102] nEPT: Nested INVEPT

Paolo Bonzini <pbonzini@xxxxxxxxxx> · Sun, 02 Nov 2014 10:03:26 +0100

You can just use the same scheme as your patch 88/102:

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 685b8448d6e2..bd8cc9055fe2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6740,6 +6740,12 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static int handle_invept(struct kvm_vcpu *vcpu)
+{
+	kvm_queue_exception(vcpu, UD_VECTOR);
+	return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -6785,6 +6791,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
 	[EXIT_REASON_MWAIT_INSTRUCTION]	      = handle_invalid_op,
 	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_invalid_op,
+	[EXIT_REASON_INVEPT]                  = handle_invept,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -7020,6 +7027,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 	case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
 	case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
 	case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
+	case EXIT_REASON_INVEPT:
 		/*
 		 * VMX instructions trap unconditionally. This allows L1 to
 		 * emulate them for its L2 guest, i.e., allows 3-level nesting!


Paolo

On 01/11/2014 23:28, Ben Hutchings wrote:
> 3.2.64-rc1 review patch.  If anyone has any objections, please let me know.
> 
> ------------------
> 
> From: Nadav Har'El <nyh@xxxxxxxxxx>
> 
> commit bfd0a56b90005f8c8a004baf407ad90045c2b11e upstream.
> 
> If we let L1 use EPT, we should probably also support the INVEPT instruction.
> 
> In our current nested EPT implementation, when L1 changes its EPT table
> for L2 (i.e., EPT12), L0 modifies the shadow EPT table (EPT02), and in
> the course of this modification already calls INVEPT. But if last level
> of shadow page is unsync not all L1's changes to EPT12 are intercepted,
> which means roots need to be synced when L1 calls INVEPT. Global INVEPT
> should not be different since roots are synced by kvm_mmu_load() each
> time EPTP02 changes.
> 
> Reviewed-by: Xiao Guangrong <xiaoguangrong@xxxxxxxxxxxxxxxxxx>
> Signed-off-by: Nadav Har'El <nyh@xxxxxxxxxx>
> Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
> Signed-off-by: Xinhao Xu <xinhao.xu@xxxxxxxxx>
> Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx>
> Signed-off-by: Gleb Natapov <gleb@xxxxxxxxxx>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> [bwh: Backported to 3.2:
>  - Adjust context, filename
>  - Add definition of nested_ept_get_cr3(), added upstream by commit
>    155a97a3d7c7 ("nEPT: MMU context for nested EPT")]
> Signed-off-by: Ben Hutchings <ben@xxxxxxxxxxxxxxx>
> ---
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -279,6 +279,7 @@ enum vmcs_field {
>  #define EXIT_REASON_APIC_ACCESS         44
>  #define EXIT_REASON_EPT_VIOLATION       48
>  #define EXIT_REASON_EPT_MISCONFIG       49
> +#define EXIT_REASON_INVEPT              50
>  #define EXIT_REASON_WBINVD		54
>  #define EXIT_REASON_XSETBV		55
>  
> @@ -397,6 +398,7 @@ enum vmcs_field {
>  #define VMX_EPT_EXTENT_INDIVIDUAL_ADDR		0
>  #define VMX_EPT_EXTENT_CONTEXT			1
>  #define VMX_EPT_EXTENT_GLOBAL			2
> +#define VMX_EPT_EXTENT_SHIFT			24
>  
>  #define VMX_EPT_EXECUTE_ONLY_BIT		(1ull)
>  #define VMX_EPT_PAGE_WALK_4_BIT			(1ull << 6)
> @@ -404,6 +406,7 @@ enum vmcs_field {
>  #define VMX_EPTP_WB_BIT				(1ull << 14)
>  #define VMX_EPT_2MB_PAGE_BIT			(1ull << 16)
>  #define VMX_EPT_1GB_PAGE_BIT			(1ull << 17)
> +#define VMX_EPT_INVEPT_BIT			(1ull << 20)
>  #define VMX_EPT_EXTENT_INDIVIDUAL_BIT		(1ull << 24)
>  #define VMX_EPT_EXTENT_CONTEXT_BIT		(1ull << 25)
>  #define VMX_EPT_EXTENT_GLOBAL_BIT		(1ull << 26)
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -2869,6 +2869,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu
>  	mmu_sync_roots(vcpu);
>  	spin_unlock(&vcpu->kvm->mmu_lock);
>  }
> +EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);
>  
>  static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
>  				  u32 access, struct x86_exception *exception)
> @@ -3131,6 +3132,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *
>  	++vcpu->stat.tlb_flush;
>  	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
>  }
> +EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb);
>  
>  static void paging_new_cr3(struct kvm_vcpu *vcpu)
>  {
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -602,6 +602,7 @@ static void nested_release_page_clean(st
>  	kvm_release_page_clean(page);
>  }
>  
> +static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
>  static u64 construct_eptp(unsigned long root_hpa);
>  static void kvm_cpu_vmxon(u64 addr);
>  static void kvm_cpu_vmxoff(void);
> @@ -1899,6 +1900,7 @@ static u32 nested_vmx_secondary_ctls_low
>  static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high;
>  static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high;
>  static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high;
> +static u32 nested_vmx_ept_caps;
>  static __init void nested_vmx_setup_ctls_msrs(void)
>  {
>  	/*
> @@ -5550,6 +5552,74 @@ static int handle_vmptrst(struct kvm_vcp
>  	return 1;
>  }
>  
> +/* Emulate the INVEPT instruction */
> +static int handle_invept(struct kvm_vcpu *vcpu)
> +{
> +	u32 vmx_instruction_info, types;
> +	unsigned long type;
> +	gva_t gva;
> +	struct x86_exception e;
> +	struct {
> +		u64 eptp, gpa;
> +	} operand;
> +	u64 eptp_mask = ((1ull << 51) - 1) & PAGE_MASK;
> +
> +	if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) ||
> +	    !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
> +		kvm_queue_exception(vcpu, UD_VECTOR);
> +		return 1;
> +	}
> +
> +	if (!nested_vmx_check_permission(vcpu))
> +		return 1;
> +
> +	if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) {
> +		kvm_queue_exception(vcpu, UD_VECTOR);
> +		return 1;
> +	}
> +
> +	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
> +	type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
> +
> +	types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
> +
> +	if (!(types & (1UL << type))) {
> +		nested_vmx_failValid(vcpu,
> +				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
> +		return 1;
> +	}
> +
> +	/* According to the Intel VMX instruction reference, the memory
> +	 * operand is read even if it isn't needed (e.g., for type==global)
> +	 */
> +	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
> +			vmx_instruction_info, &gva))
> +		return 1;
> +	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
> +				sizeof(operand), &e)) {
> +		kvm_inject_page_fault(vcpu, &e);
> +		return 1;
> +	}
> +
> +	switch (type) {
> +	case VMX_EPT_EXTENT_CONTEXT:
> +		if ((operand.eptp & eptp_mask) !=
> +				(nested_ept_get_cr3(vcpu) & eptp_mask))
> +			break;
> +	case VMX_EPT_EXTENT_GLOBAL:
> +		kvm_mmu_sync_roots(vcpu);
> +		kvm_mmu_flush_tlb(vcpu);
> +		nested_vmx_succeed(vcpu);
> +		break;
> +	default:
> +		BUG_ON(1);
> +		break;
> +	}
> +
> +	skip_emulated_instruction(vcpu);
> +	return 1;
> +}
> +
>  /*
>   * The exit handlers return 1 if the exit was handled fully and guest execution
>   * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
> @@ -5591,6 +5661,7 @@ static int (*kvm_vmx_exit_handlers[])(st
>  	[EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
>  	[EXIT_REASON_MWAIT_INSTRUCTION]	      = handle_invalid_op,
>  	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_invalid_op,
> +	[EXIT_REASON_INVEPT]                  = handle_invept,
>  };
>  
>  static const int kvm_vmx_max_exit_handlers =
> @@ -5775,6 +5846,7 @@ static bool nested_vmx_exit_handled(stru
>  	case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
>  	case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
>  	case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
> +	case EXIT_REASON_INVEPT:
>  		/*
>  		 * VMX instructions trap unconditionally. This allows L1 to
>  		 * emulate them for its L2 guest, i.e., allows 3-level nesting!
> @@ -6436,6 +6508,12 @@ static void vmx_set_supported_cpuid(u32
>  		entry->ecx |= bit(X86_FEATURE_VMX);
>  }
>  
> +static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
> +{
> +	/* return the page table to be shadowed - in our case, EPT12 */
> +	return get_vmcs12(vcpu)->ept_pointer;
> +}
> +
>  /*
>   * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
>   * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
> 
--
To unsubscribe from this list: send the line "unsubscribe stable" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html