Re: [PATCH v14 004/113] KVM: TDX: Initialize the TDX module when loading the KVM intel kernel module

Isaku Yamahata <isaku.yamahata@xxxxxxxxx> · Wed, 14 Jun 2023 09:05:07 -0700

On Wed, Jun 14, 2023 at 09:41:58AM +0000,
"Huang, Kai" <kai.huang@xxxxxxxxx> wrote:

> On Tue, 2023-06-13 at 10:38 -0700, Isaku Yamahata wrote:
> > On Mon, Jun 12, 2023 at 11:55:14PM +0000,
> > "Huang, Kai" <kai.huang@xxxxxxxxx> wrote:
> > 
> > > On Wed, 2023-06-07 at 11:06 -0700, Isaku Yamahata wrote:
> > > > Thanks for pointing it out. The following is the fix.
> > > > 
> > > > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> > > > index 8a1d0755d275..b0d3f646afb1 100644
> > > > --- a/arch/x86/kvm/vmx/tdx.c
> > > > +++ b/arch/x86/kvm/vmx/tdx.c
> > > > @@ -4499,26 +4499,39 @@ u64 tdx_non_arch_field_switch(u64 field)
> > > >  	}
> > > >  }
> > > >  
> > > > -static void __init vmx_tdx_on(void *info)
> > > > +struct vmx_tdx_enabled {
> > > > +	cpumask_var_t vmx_enabled;
> > > > +	atomic_t *err;
> > > > +};
> > > > +
> > > 
> > > Sorry for late reply.
> > > 
> > > I think you just need to mimic hardware_enable_all() -- using a per-cpu
> > > variable.  In this way you can get rid of this structure.
> > > 
> > > But again, we have listed a couple of options in the v13 discussion [1]:
> > > 
> > > 1) Call kvm_ops_update() twice before and after hardware_setup() in order to use
> > > hardware_enable_all() directly.
> > > 
> > > 2) Expose kvm_x86_ops as symbol so VMX can set hardware_{enable|disable}()
> > > callback before hardware_setup() in order to use hardware_enable_all().
> > > 
> > > 3) Implement VMX's own hardware_enable_all() logic as shown in this patch.
> > > 
> > > 4) ???
> > > 
> > > I think it would be better if Sean can provide some comments here, but until he
> > > does, we can keep using option 3) (this patch).
> > > 
> > > [1]
> > > https://lore.kernel.org/lkml/5dc84a2601a47ccc29ef43200cf3ec0d1b485d23.camel@xxxxxxxxx/
> > 
> > Ok, makes sense. Here is the updated version with the fix for the error you
> > pointed out.  Introduce cpu bitmap to track which cpu enable VMX(VMXON)
> > successfully.  Disable VMX off only for cpu with bit set.
> > 
> > 
> [...]
> 
> > +struct vmx_tdx_enabled {
> > +	cpumask_var_t vmx_enabled;
> > +	atomic_t err;
> > +};
> > +
> 
> Again (and again), why not just mimic hardware_enable_all() to use a per-cpu
> variable instead of a cpumask, so that you can get rid of this structure?

Do you mean __hardware_enable_nolock() uses per-cpu variable?
Because hardware setup is one shot on the initialization, we don't want to
allocate the variable statically. Anyway the following is a patch to use
per-cpu variable with dynamic allocation.  Which version do you prefer?

diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 95be6b2fba83..40a3c9c01ac6 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -4511,38 +4511,34 @@ u64 tdx_non_arch_field_switch(u64 field)
 }
 
 struct vmx_tdx_enabled {
-	cpumask_var_t vmx_enabled;
-	atomic_t err;
+	bool vmx_enabled;
+	int err;
 };
 
 static void __init vmx_tdx_on(void *_vmx_tdx)
 {
-	struct vmx_tdx_enabled *vmx_tdx = _vmx_tdx;
-	int r;
+	struct vmx_tdx_enabled *vmx_tdx = this_cpu_ptr(_vmx_tdx);
 
-	r = vmx_hardware_enable();
-	if (!r) {
-		cpumask_set_cpu(smp_processor_id(), vmx_tdx->vmx_enabled);
-		r = tdx_cpu_enable();
+	vmx_tdx->err = vmx_hardware_enable();
+	if (!vmx_tdx->err) {
+		vmx_tdx->vmx_enabled = true;
+		vmx_tdx->err = tdx_cpu_enable();
 	}
-	if (r)
-		atomic_set(&vmx_tdx->err, r);
 }
 
-static void __init vmx_off(void *_vmx_enabled)
+static void __init vmx_off(void *_vmx_tdx)
 {
-	cpumask_var_t *vmx_enabled = (cpumask_var_t *)_vmx_enabled;
+	struct vmx_tdx_enabled *vmx_tdx = this_cpu_ptr(_vmx_tdx);
 
-	if (cpumask_test_cpu(smp_processor_id(), *vmx_enabled))
+	if (vmx_tdx->vmx_enabled)
 		vmx_hardware_disable();
 }
 
 int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
 {
-	struct vmx_tdx_enabled vmx_tdx = {
-		.err = ATOMIC_INIT(0),
-	};
+	struct vmx_tdx_enabled __percpu *vmx_tdx_enabled;
 	int max_pkgs;
+	int cpu;
 	int r = 0;
 	int i;
 
@@ -4603,7 +4599,8 @@ int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
 	for (i = 0; i < max_pkgs; i++)
 		mutex_init(&tdx_mng_key_config_lock[i]);
 
-	if (!zalloc_cpumask_var(&vmx_tdx.vmx_enabled, GFP_KERNEL)) {
+	vmx_tdx_enabled = alloc_percpu_gfp(struct vmx_tdx_enabled, GFP_KERNEL | __GFP_ZERO);
+	if (!vmx_tdx_enabled) {
 		r = -ENOMEM;
 		goto out;
 	}
@@ -4618,15 +4615,21 @@ int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
 	 */
 	if (!cpumask_equal(cpu_online_mask, cpu_present_mask))
 		pr_warn("The old TDX module requires all present CPUs to be online to initialize.\n");
-	on_each_cpu(vmx_tdx_on, &vmx_tdx, true);	/* TDX requires vmxon. */
-	r = atomic_read(&vmx_tdx.err);
+	on_each_cpu(vmx_tdx_on, vmx_tdx_enabled, true);	/* TDX requires vmxon. */
+	for_each_present_cpu(cpu) {
+		struct vmx_tdx_enabled *vmx_tdx = per_cpu_ptr(vmx_tdx_enabled, cpu);
+		if (vmx_tdx->err) {
+			r = vmx_tdx->err;
+			break;
+		}
+	}
 	if (!r)
 		r = tdx_module_setup();
 	else
 		r = -EIO;
-	on_each_cpu(vmx_off, &vmx_tdx.vmx_enabled, true);
+	on_each_cpu(vmx_off, vmx_tdx_enabled, true);
 	cpus_read_unlock();
-	free_cpumask_var(vmx_tdx.vmx_enabled);
+	free_percpu(vmx_tdx_enabled);
 	if (r)
 		goto out;
 
-- 
2.25.1




-- 
Isaku Yamahata <isaku.yamahata@xxxxxxxxx>