Re: [PATCH] kvm: ensure VMCS is current while enabling PML

Bandan Das <bsd@xxxxxxxxxx> · Thu, 07 Jul 2016 17:13:58 -0400

Hi Peter,

Peter Feiner <pfeiner@xxxxxxxxxx> writes:

> Between loading the new VMCS and enabling PML, the CPU was unpinned.
> If the vCPU thread were migrated to another CPU in the interim (e.g.,
> due to preemption or sleeping alloc_page), then the VMWRITEs to enable
> PML would target the wrong VMCS -- or no VMCS at all:
>
>   [ 2087.266950] vmwrite error: reg 200e value 3fe1d52000 (err -506126336)
>   [ 2087.267062] vmwrite error: reg 812 value 1ff (err 511)
>   [ 2087.267125] vmwrite error: reg 401e value 12229c00 (err 304258048)
>
> This patch ensures that the VMCS remains current while enabling PML by
> doing the VMWRITEs while the CPU is pinned. Allocation of the PML buffer
> is hoisted out of the critical section.
>
> Signed-off-by: Peter Feiner <pfeiner@xxxxxxxxxx>
> ---
>  arch/x86/kvm/vmx.c | 50 ++++++++++++++++++++++----------------------------
>  1 file changed, 22 insertions(+), 28 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 64a79f2..6d3a8d7 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -4876,6 +4876,7 @@ static void ept_set_mmio_spte_mask(void)
>  }
>  
>  #define VMX_XSS_EXIT_BITMAP 0
> +
Stray space

>  /*
>   * Sets up the vmcs for emulated real mode.
>   */
> @@ -4979,6 +4980,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>  	if (vmx_xsaves_supported())
>  		vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
>  
> +	if (enable_pml) {
> +		ASSERT(vmx->pml_pg);
> +		vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
> +		vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
> +	}
> +
>  	return 0;
>  }
>  
> @@ -7937,22 +7944,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
>  	*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
>  }
>  
> -static int vmx_create_pml_buffer(struct vcpu_vmx *vmx)
> -{
> -	struct page *pml_pg;
> -
> -	pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
> -	if (!pml_pg)
> -		return -ENOMEM;
> -
> -	vmx->pml_pg = pml_pg;
> -
> -	vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
> -	vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
> -
> -	return 0;
> -}
> -
>  static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
>  {
>  	if (vmx->pml_pg) {
> @@ -8881,6 +8872,19 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>  
>  	vmx->vpid = allocate_vpid();
>  
> +	/*
> +	 * If PML is turned on, failure on enabling PML just results in failure
> +	 * of creating the vcpu, therefore we can simplify PML logic (by
> +	 * avoiding dealing with cases, such as enabling PML partially on vcpus
> +	 * for the guest, etc.
> +	 */
> +	if (enable_pml) {
> +		vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +		err = -ENOMEM;

Since err is being set to -ENOMEM after the call to BUILD_BUG_ON a few lines below,
you could just move this before the vmx->msrs check.

> +		if (!vmx->pml_pg)
> +			goto free_vcpu;
> +	}
> +
>  	err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
>  	if (err)
>  		goto free_vcpu;
> @@ -8936,18 +8940,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>  	vmx->nested.current_vmptr = -1ull;
>  	vmx->nested.current_vmcs12 = NULL;
>  
> -	/*
> -	 * If PML is turned on, failure on enabling PML just results in failure
> -	 * of creating the vcpu, therefore we can simplify PML logic (by
> -	 * avoiding dealing with cases, such as enabling PML partially on vcpus
> -	 * for the guest, etc.
> -	 */
> -	if (enable_pml) {
> -		err = vmx_create_pml_buffer(vmx);
> -		if (err)
> -			goto free_vmcs;
> -	}
> -
>  	return &vmx->vcpu;
>  
>  free_vmcs:
> @@ -8958,6 +8950,8 @@ free_msrs:
>  uninit_vcpu:
>  	kvm_vcpu_uninit(&vmx->vcpu);
>  free_vcpu:
> +	if (vmx->pml_pg)
> +		__free_page(vmx->pml_pg);

If we move  __free_page to uninit_vcpu(), we can
remove the if() check here.

Bandan

>  	free_vpid(vmx->vpid);
>  	kmem_cache_free(kvm_vcpu_cache, vmx);
>  	return ERR_PTR(err);
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html