Re: [PATCH v3 4/4] x86/kvm: add boot parameter for setting max number of vcpus per guest

Sean Christopherson <seanjc@xxxxxxxxxx> · Wed, 17 Nov 2021 20:57:01 +0000

On Tue, Nov 16, 2021, Juergen Gross wrote:
> Today the maximum number of vcpus of a kvm guest is set via a #define
> in a header file.
> 
> In order to support higher vcpu numbers for guests without generally
> increasing the memory consumption of guests on the host especially on
> very large systems add a boot parameter for specifying the number of
> allowed vcpus for guests.
> 
> The default will still be the current setting of 1024. The value 0 has
> the special meaning to limit the number of possible vcpus to the
> number of possible cpus of the host.
> 
> Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
> ---
> V3:
> - rebase
> ---
>  Documentation/admin-guide/kernel-parameters.txt | 7 +++++++
>  arch/x86/include/asm/kvm_host.h                 | 5 ++++-
>  arch/x86/kvm/x86.c                              | 9 ++++++++-
>  3 files changed, 19 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index e269c3f66ba4..409a72c2d91b 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -2445,6 +2445,13 @@
>  			feature (tagged TLBs) on capable Intel chips.
>  			Default is 1 (enabled)
>  
> +	kvm.max_vcpus=	[KVM,X86] Set the maximum allowed numbers of vcpus per
> +			guest. The special value 0 sets the limit to the number
> +			of physical cpus possible on the host (including not
> +			yet hotplugged cpus). Higher values will result in
> +			slightly higher memory consumption per guest.
> +			Default: 1024

Rather than makes this a module param, I would prefer to start with the below
patch (originally from TDX pre-enabling) and then wire up a way for userspace to
_lower_ the max on a per-VM basis, e.g. add a capability.

VMs largely fall into two categories: (1) the max number of vCPUs is known prior
to VM creation, or (2) the max number of vCPUs is unbounded (up to KVM's hard
limit), e.g. for container-style use cases where "vCPUs" are created on-demand in
response to the "guest" creating a new task.

For #1, a per-VM control lets userspace lower the limit to the bare minimum.  For
#2, neither the module param nor the per-VM control is likely to be useful, but
a per-VM control does let mixed environments (both #1 and #2 VMs) lower the limits
for compatible VMs, whereas a module param must be set to the max of any potential VM.

>From 0593cb4f73a6c3f0862f9411f0e14f00671f59ae Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
Date: Fri, 2 Jul 2021 15:04:27 -0700
Subject: [PATCH] KVM: Add max_vcpus field in common 'struct kvm'

Move arm's per-VM max_vcpus field into the generic "struct kvm", and use
it to check vcpus_created in the generic code instead of checking only
the hardcoded absolute KVM-wide max.  x86 TDX guests will reuse the
generic check verbatim, as the max number of vCPUs for a TDX guest is
user defined at VM creation and immutable thereafter.

Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
 arch/arm64/include/asm/kvm_host.h | 3 ---
 arch/arm64/kvm/arm.c              | 7 ++-----
 arch/arm64/kvm/vgic/vgic-init.c   | 6 +++---
 include/linux/kvm_host.h          | 1 +
 virt/kvm/kvm_main.c               | 3 ++-
 5 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 4be8486042a7..b51e1aa6ae27 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -108,9 +108,6 @@ struct kvm_arch {
 	/* VTCR_EL2 value for this VM */
 	u64    vtcr;

-	/* The maximum number of vCPUs depends on the used GIC model */
-	int max_vcpus;
-
 	/* Interrupt controller */
 	struct vgic_dist	vgic;

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index f5490afe1ebf..97c3b83235b4 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -153,7 +153,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm_vgic_early_init(kvm);

 	/* The maximum number of VCPUs is limited by the host's GIC model */
-	kvm->arch.max_vcpus = kvm_arm_default_max_vcpus();
+	kvm->max_vcpus = kvm_arm_default_max_vcpus();

 	set_default_spectre(kvm);

@@ -228,7 +228,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_MAX_VCPUS:
 	case KVM_CAP_MAX_VCPU_ID:
 		if (kvm)
-			r = kvm->arch.max_vcpus;
+			r = kvm->max_vcpus;
 		else
 			r = kvm_arm_default_max_vcpus();
 		break;
@@ -304,9 +304,6 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
 	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
 		return -EBUSY;

-	if (id >= kvm->arch.max_vcpus)
-		return -EINVAL;
-
 	return 0;
 }

diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 0a06d0648970..906aee52f2bc 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -97,11 +97,11 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
 	ret = 0;

 	if (type == KVM_DEV_TYPE_ARM_VGIC_V2)
-		kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS;
+		kvm->max_vcpus = VGIC_V2_MAX_CPUS;
 	else
-		kvm->arch.max_vcpus = VGIC_V3_MAX_CPUS;
+		kvm->max_vcpus = VGIC_V3_MAX_CPUS;

-	if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus) {
+	if (atomic_read(&kvm->online_vcpus) > kvm->max_vcpus) {
 		ret = -E2BIG;
 		goto out_unlock;
 	}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 60a35d9fe259..5f56516e2f5a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -566,6 +566,7 @@ struct kvm {
 	 * and is accessed atomically.
 	 */
 	atomic_t online_vcpus;
+	int max_vcpus;
 	int created_vcpus;
 	int last_boosted_vcpu;
 	struct list_head vm_list;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3f6d450355f0..e509b963651c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1052,6 +1052,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	rcuwait_init(&kvm->mn_memslots_update_rcuwait);

 	INIT_LIST_HEAD(&kvm->devices);
+	kvm->max_vcpus = KVM_MAX_VCPUS;

 	BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);

@@ -3599,7 +3600,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 		return -EINVAL;

 	mutex_lock(&kvm->lock);
-	if (kvm->created_vcpus == KVM_MAX_VCPUS) {
+	if (kvm->created_vcpus >= kvm->max_vcpus) {
 		mutex_unlock(&kvm->lock);
 		return -EINVAL;
 	}
--
2.34.0.rc1.387.gb447b232ab-goog