Re: [PATCH v6 9/9] KVM: VMX: Optimize memory allocation for PID-pointer table

Maxim Levitsky <mlevitsk@xxxxxxxxxx> · Fri, 25 Feb 2022 19:29:39 +0200

On Fri, 2022-02-25 at 16:22 +0800, Zeng Guang wrote:
> Current kvm allocates 8 pages in advance for Posted Interrupt Descriptor
> pointer (PID-pointer) table to accommodate vCPUs with APIC ID up to
> KVM_MAX_VCPU_IDS - 1. This policy wastes some memory because most of
> VMs have less than 512 vCPUs and then just need one page.
> 
> If user hypervisor specify max practical vcpu id prior to vCPU creation,
> IPIv can allocate only essential memory for PID-pointer table and reduce
> the memory footprint of VMs.
> 
> Suggested-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> Signed-off-by: Zeng Guang <guang.zeng@xxxxxxxxx>
> ---
>  arch/x86/kvm/vmx/vmx.c | 45 ++++++++++++++++++++++++++++--------------
>  1 file changed, 30 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 0cb141c277ef..22bfb4953289 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -230,9 +230,6 @@ static const struct {
>  };
>  
>  #define L1D_CACHE_ORDER 4
> -
> -/* PID(Posted-Interrupt Descriptor)-pointer table entry is 64-bit long */
> -#define MAX_PID_TABLE_ORDER get_order(KVM_MAX_VCPU_IDS * sizeof(u64))
>  #define PID_TABLE_ENTRY_VALID 1
>  
>  static void *vmx_l1d_flush_pages;
> @@ -4434,6 +4431,24 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
>  	return exec_control;
>  }
>  
> +static int vmx_alloc_pid_table(struct kvm_vmx *kvm_vmx)
> +{
> +	struct page *pages;
> +
> +	if(kvm_vmx->pid_table)
> +		return 0;
> +
> +	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO,
> +			get_order(kvm_vmx->kvm.arch.max_vcpu_id * sizeof(u64)));
> +
> +	if (!pages)
> +		return -ENOMEM;
> +
> +	kvm_vmx->pid_table = (void *)page_address(pages);
> +	kvm_vmx->pid_last_index = kvm_vmx->kvm.arch.max_vcpu_id - 1;
> +	return 0;
> +}
> +
>  #define VMX_XSS_EXIT_BITMAP 0
>  
>  static void init_vmcs(struct vcpu_vmx *vmx)
> @@ -7159,6 +7174,16 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
>  			goto free_vmcs;
>  	}
>  
> +	if (enable_ipiv && kvm_vcpu_apicv_active(vcpu)) {
> +		struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
> +
> +		mutex_lock(&vcpu->kvm->lock);
> +		err = vmx_alloc_pid_table(kvm_vmx);
> +		mutex_unlock(&vcpu->kvm->lock);
> +		if (err)
> +			goto free_vmcs;
> +	}

This could be dangerous. If APICv is temporary inhibited,
this code won't run and we will end up without PID table.

I think that kvm_vcpu_apicv_active should be just dropped
from this condition.

Best regards,
	Maxim Levitsky

> +
>  	return 0;
>  
>  free_vmcs:
> @@ -7202,17 +7227,6 @@ static int vmx_vm_init(struct kvm *kvm)
>  		}
>  	}
>  
> -	if (enable_ipiv) {
> -		struct page *pages;
> -
> -		pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, MAX_PID_TABLE_ORDER);
> -		if (!pages)
> -			return -ENOMEM;
> -
> -		to_kvm_vmx(kvm)->pid_table = (void *)page_address(pages);
> -		to_kvm_vmx(kvm)->pid_last_index = KVM_MAX_VCPU_IDS - 1;
> -	}
> -
>  	return 0;
>  }
>  
> @@ -7809,7 +7823,8 @@ static void vmx_vm_destroy(struct kvm *kvm)
>  	struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
>  
>  	if (kvm_vmx->pid_table)
> -		free_pages((unsigned long)kvm_vmx->pid_table, MAX_PID_TABLE_ORDER);
> +		free_pages((unsigned long)kvm_vmx->pid_table,
> +			get_order((kvm_vmx->pid_last_index + 1) * sizeof(u64)));
>  }
>  
>  static struct kvm_x86_ops vmx_x86_ops __initdata = {