On Fri, 2022-02-25 at 16:22 +0800, Zeng Guang wrote: > Current kvm allocates 8 pages in advance for Posted Interrupt Descriptor > pointer (PID-pointer) table to accommodate vCPUs with APIC ID up to > KVM_MAX_VCPU_IDS - 1. This policy wastes some memory because most of > VMs have less than 512 vCPUs and then just need one page. > > If user hypervisor specify max practical vcpu id prior to vCPU creation, > IPIv can allocate only essential memory for PID-pointer table and reduce > the memory footprint of VMs. > > Suggested-by: Sean Christopherson <seanjc@xxxxxxxxxx> > Signed-off-by: Zeng Guang <guang.zeng@xxxxxxxxx> > --- > arch/x86/kvm/vmx/vmx.c | 45 ++++++++++++++++++++++++++++-------------- > 1 file changed, 30 insertions(+), 15 deletions(-) > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 0cb141c277ef..22bfb4953289 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -230,9 +230,6 @@ static const struct { > }; > > #define L1D_CACHE_ORDER 4 > - > -/* PID(Posted-Interrupt Descriptor)-pointer table entry is 64-bit long */ > -#define MAX_PID_TABLE_ORDER get_order(KVM_MAX_VCPU_IDS * sizeof(u64)) > #define PID_TABLE_ENTRY_VALID 1 > > static void *vmx_l1d_flush_pages; > @@ -4434,6 +4431,24 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) > return exec_control; > } > > +static int vmx_alloc_pid_table(struct kvm_vmx *kvm_vmx) > +{ > + struct page *pages; > + > + if(kvm_vmx->pid_table) > + return 0; > + > + pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, > + get_order(kvm_vmx->kvm.arch.max_vcpu_id * sizeof(u64))); > + > + if (!pages) > + return -ENOMEM; > + > + kvm_vmx->pid_table = (void *)page_address(pages); > + kvm_vmx->pid_last_index = kvm_vmx->kvm.arch.max_vcpu_id - 1; > + return 0; > +} > + > #define VMX_XSS_EXIT_BITMAP 0 > > static void init_vmcs(struct vcpu_vmx *vmx) > @@ -7159,6 +7174,16 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) > goto free_vmcs; > } > > + if (enable_ipiv && kvm_vcpu_apicv_active(vcpu)) { > + struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm); > + > + mutex_lock(&vcpu->kvm->lock); > + err = vmx_alloc_pid_table(kvm_vmx); > + mutex_unlock(&vcpu->kvm->lock); > + if (err) > + goto free_vmcs; > + } This could be dangerous. If APICv is temporary inhibited, this code won't run and we will end up without PID table. I think that kvm_vcpu_apicv_active should be just dropped from this condition. Best regards, Maxim Levitsky > + > return 0; > > free_vmcs: > @@ -7202,17 +7227,6 @@ static int vmx_vm_init(struct kvm *kvm) > } > } > > - if (enable_ipiv) { > - struct page *pages; > - > - pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, MAX_PID_TABLE_ORDER); > - if (!pages) > - return -ENOMEM; > - > - to_kvm_vmx(kvm)->pid_table = (void *)page_address(pages); > - to_kvm_vmx(kvm)->pid_last_index = KVM_MAX_VCPU_IDS - 1; > - } > - > return 0; > } > > @@ -7809,7 +7823,8 @@ static void vmx_vm_destroy(struct kvm *kvm) > struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); > > if (kvm_vmx->pid_table) > - free_pages((unsigned long)kvm_vmx->pid_table, MAX_PID_TABLE_ORDER); > + free_pages((unsigned long)kvm_vmx->pid_table, > + get_order((kvm_vmx->pid_last_index + 1) * sizeof(u64))); > } > > static struct kvm_x86_ops vmx_x86_ops __initdata = {