Re: [PATCH v3 05/13] nEPT: MMU context for nested EPT

Xiao Guangrong <xiaoguangrong@xxxxxxxxxxxxxxxxxx> · Tue, 21 May 2013 16:50:58 +0800

On 05/19/2013 12:52 PM, Jun Nakajima wrote:
> From: Nadav Har'El <nyh@xxxxxxxxxx>
> 
> KVM's existing shadow MMU code already supports nested TDP. To use it, we
> need to set up a new "MMU context" for nested EPT, and create a few callbacks
> for it (nested_ept_*()). This context should also use the EPT versions of
> the page table access functions (defined in the previous patch).
> Then, we need to switch back and forth between this nested context and the
> regular MMU context when switching between L1 and L2 (when L1 runs this L2
> with EPT).
> 
> Signed-off-by: Nadav Har'El <nyh@xxxxxxxxxx>
> Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
> Signed-off-by: Xinhao Xu <xinhao.xu@xxxxxxxxx>
> ---
>  arch/x86/kvm/mmu.c | 38 ++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/mmu.h |  1 +
>  arch/x86/kvm/vmx.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 92 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 6c1670f..37f8d7f 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -3653,6 +3653,44 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
>  }
>  EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
> 
> +int kvm_init_shadow_EPT_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
> +{
> +	ASSERT(vcpu);
> +	ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
> +
> +	context->shadow_root_level = kvm_x86_ops->get_tdp_level();

That means L1 guest always uses page-walk length == 4? But in your previous patch,
it can be 2.

> +
> +	context->nx = is_nx(vcpu); /* TODO: ? */

Hmm? EPT always support NX.

> +	context->new_cr3 = paging_new_cr3;
> +	context->page_fault = EPT_page_fault;
> +	context->gva_to_gpa = EPT_gva_to_gpa;
> +	context->sync_page = EPT_sync_page;
> +	context->invlpg = EPT_invlpg;
> +	context->update_pte = EPT_update_pte;
> +	context->free = paging_free;
> +	context->root_level = context->shadow_root_level;
> +	context->root_hpa = INVALID_PAGE;
> +	context->direct_map = false;
> +
> +	/* TODO: reset_rsvds_bits_mask() is not built for EPT, we need
> +	   something different.
> +	 */

Exactly. :)

> +	reset_rsvds_bits_mask(vcpu, context);
> +
> +
> +	/* TODO: I copied these from kvm_init_shadow_mmu, I don't know why
> +	   they are done, or why they write to vcpu->arch.mmu and not context
> +	 */
> +	vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
> +	vcpu->arch.mmu.base_role.cr0_wp  = is_write_protection(vcpu);
> +	vcpu->arch.mmu.base_role.smep_andnot_wp =
> +		kvm_read_cr4_bits(vcpu, X86_CR4_SMEP) &&
> +		!is_write_protection(vcpu);

I guess we need not care these since the permission of EPT page does not depend
on these.

> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(kvm_init_shadow_EPT_mmu);
> +
>  static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
>  {
>  	int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu);
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index 2adcbc2..8fc94dd 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -54,6 +54,7 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
>  void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
>  int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
>  int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
> +int kvm_init_shadow_EPT_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
> 
>  static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
>  {
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index fb9cae5..a88432f 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -1045,6 +1045,11 @@ static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12,
>  	return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
>  }
> 
> +static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
> +{
> +	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
> +}
> +
>  static inline bool is_exception(u32 intr_info)
>  {
>  	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
> @@ -7311,6 +7316,46 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
>  		entry->ecx |= bit(X86_FEATURE_VMX);
>  }
> 
> +/* Callbacks for nested_ept_init_mmu_context: */
> +
> +static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
> +{
> +	/* return the page table to be shadowed - in our case, EPT12 */
> +	return get_vmcs12(vcpu)->ept_pointer;
> +}
> +
> +static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
> +	struct x86_exception *fault)
> +{
> +	struct vmcs12 *vmcs12;
> +	nested_vmx_vmexit(vcpu);
> +	vmcs12 = get_vmcs12(vcpu);
> +	/*
> +	 * Note no need to set vmcs12->vm_exit_reason as it is already copied
> +	 * from vmcs02 in nested_vmx_vmexit() above, i.e., EPT_VIOLATION.
> +	 */
> +	vmcs12->exit_qualification = fault->error_code;

Hmm, you directly copy the error code from FNAME(walk_addr_generic),
but its format is different and i did not see you cook the error code
in the previous patches.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html