On Wed, Jun 14, 2023 at 09:41:58AM +0000, "Huang, Kai" <kai.huang@xxxxxxxxx> wrote: > On Tue, 2023-06-13 at 10:38 -0700, Isaku Yamahata wrote: > > On Mon, Jun 12, 2023 at 11:55:14PM +0000, > > "Huang, Kai" <kai.huang@xxxxxxxxx> wrote: > > > > > On Wed, 2023-06-07 at 11:06 -0700, Isaku Yamahata wrote: > > > > Thanks for pointing it out. The following is the fix. > > > > > > > > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c > > > > index 8a1d0755d275..b0d3f646afb1 100644 > > > > --- a/arch/x86/kvm/vmx/tdx.c > > > > +++ b/arch/x86/kvm/vmx/tdx.c > > > > @@ -4499,26 +4499,39 @@ u64 tdx_non_arch_field_switch(u64 field) > > > > } > > > > } > > > > > > > > -static void __init vmx_tdx_on(void *info) > > > > +struct vmx_tdx_enabled { > > > > + cpumask_var_t vmx_enabled; > > > > + atomic_t *err; > > > > +}; > > > > + > > > > > > Sorry for late reply. > > > > > > I think you just need to mimic hardware_enable_all() -- using a per-cpu > > > variable. In this way you can get rid of this structure. > > > > > > But again, we have listed a couple of options in the v13 discussion [1]: > > > > > > 1) Call kvm_ops_update() twice before and after hardware_setup() in order to use > > > hardware_enable_all() directly. > > > > > > 2) Expose kvm_x86_ops as symbol so VMX can set hardware_{enable|disable}() > > > callback before hardware_setup() in order to use hardware_enable_all(). > > > > > > 3) Implement VMX's own hardware_enable_all() logic as shown in this patch. > > > > > > 4) ??? > > > > > > I think it would be better if Sean can provide some comments here, but until he > > > does, we can keep using option 3) (this patch). > > > > > > [1] > > > https://lore.kernel.org/lkml/5dc84a2601a47ccc29ef43200cf3ec0d1b485d23.camel@xxxxxxxxx/ > > > > Ok, makes sense. Here is the updated version with the fix for the error you > > pointed out. Introduce cpu bitmap to track which cpu enable VMX(VMXON) > > successfully. Disable VMX off only for cpu with bit set. > > > > > [...] > > > +struct vmx_tdx_enabled { > > + cpumask_var_t vmx_enabled; > > + atomic_t err; > > +}; > > + > > Again (and again), why not just mimic hardware_enable_all() to use a per-cpu > variable instead of a cpumask, so that you can get rid of this structure? Do you mean __hardware_enable_nolock() uses per-cpu variable? Because hardware setup is one shot on the initialization, we don't want to allocate the variable statically. Anyway the following is a patch to use per-cpu variable with dynamic allocation. Which version do you prefer? diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 95be6b2fba83..40a3c9c01ac6 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -4511,38 +4511,34 @@ u64 tdx_non_arch_field_switch(u64 field) } struct vmx_tdx_enabled { - cpumask_var_t vmx_enabled; - atomic_t err; + bool vmx_enabled; + int err; }; static void __init vmx_tdx_on(void *_vmx_tdx) { - struct vmx_tdx_enabled *vmx_tdx = _vmx_tdx; - int r; + struct vmx_tdx_enabled *vmx_tdx = this_cpu_ptr(_vmx_tdx); - r = vmx_hardware_enable(); - if (!r) { - cpumask_set_cpu(smp_processor_id(), vmx_tdx->vmx_enabled); - r = tdx_cpu_enable(); + vmx_tdx->err = vmx_hardware_enable(); + if (!vmx_tdx->err) { + vmx_tdx->vmx_enabled = true; + vmx_tdx->err = tdx_cpu_enable(); } - if (r) - atomic_set(&vmx_tdx->err, r); } -static void __init vmx_off(void *_vmx_enabled) +static void __init vmx_off(void *_vmx_tdx) { - cpumask_var_t *vmx_enabled = (cpumask_var_t *)_vmx_enabled; + struct vmx_tdx_enabled *vmx_tdx = this_cpu_ptr(_vmx_tdx); - if (cpumask_test_cpu(smp_processor_id(), *vmx_enabled)) + if (vmx_tdx->vmx_enabled) vmx_hardware_disable(); } int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops) { - struct vmx_tdx_enabled vmx_tdx = { - .err = ATOMIC_INIT(0), - }; + struct vmx_tdx_enabled __percpu *vmx_tdx_enabled; int max_pkgs; + int cpu; int r = 0; int i; @@ -4603,7 +4599,8 @@ int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops) for (i = 0; i < max_pkgs; i++) mutex_init(&tdx_mng_key_config_lock[i]); - if (!zalloc_cpumask_var(&vmx_tdx.vmx_enabled, GFP_KERNEL)) { + vmx_tdx_enabled = alloc_percpu_gfp(struct vmx_tdx_enabled, GFP_KERNEL | __GFP_ZERO); + if (!vmx_tdx_enabled) { r = -ENOMEM; goto out; } @@ -4618,15 +4615,21 @@ int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops) */ if (!cpumask_equal(cpu_online_mask, cpu_present_mask)) pr_warn("The old TDX module requires all present CPUs to be online to initialize.\n"); - on_each_cpu(vmx_tdx_on, &vmx_tdx, true); /* TDX requires vmxon. */ - r = atomic_read(&vmx_tdx.err); + on_each_cpu(vmx_tdx_on, vmx_tdx_enabled, true); /* TDX requires vmxon. */ + for_each_present_cpu(cpu) { + struct vmx_tdx_enabled *vmx_tdx = per_cpu_ptr(vmx_tdx_enabled, cpu); + if (vmx_tdx->err) { + r = vmx_tdx->err; + break; + } + } if (!r) r = tdx_module_setup(); else r = -EIO; - on_each_cpu(vmx_off, &vmx_tdx.vmx_enabled, true); + on_each_cpu(vmx_off, vmx_tdx_enabled, true); cpus_read_unlock(); - free_cpumask_var(vmx_tdx.vmx_enabled); + free_percpu(vmx_tdx_enabled); if (r) goto out; -- 2.25.1 -- Isaku Yamahata <isaku.yamahata@xxxxxxxxx>