On Tue, Sep 10, 2024 at 12:45:07PM +0200, Paolo Bonzini wrote: > On 9/4/24 05:07, Rick Edgecombe wrote: > > +/* > > + * A per-CPU list of TD vCPUs associated with a given CPU. Used when a CPU > > + * is brought down to invoke TDH_VP_FLUSH on the appropriate TD vCPUS. > > ... or when a vCPU is migrated. > > > + * Protected by interrupt mask. This list is manipulated in process context > > + * of vCPU and IPI callback. See tdx_flush_vp_on_cpu(). > > + */ > > +static DEFINE_PER_CPU(struct list_head, associated_tdvcpus); > > It may be a bit more modern, or cleaner, to use a local_lock here instead of > just relying on local_irq_disable/enable. Hi Paolo, After converting local_irq_disable/enable to local_lock (as the fixup patch at the bottom), lockdep reported "BUG: Invalid wait context" to the kvm_shutdown path. This is because local_lock_irqsave() internally holds a spinlock, which is not raw_spin_lock, and therefore is regarded by lockdep as sleepable in an atomic context introduced by on_each_cpu() in kvm_shutdown(). kvm_shutdown |->on_each_cpu(__kvm_disable_virtualization, NULL, 1); __kvm_disable_virtualization kvm_arch_hardware_disable tdx_hardware_disable local_lock_irqsave Given that (1) tdx_hardware_disable() is called per-cpu and will only manipulate the per-cpu list of its running cpu; (2) tdx_vcpu_load() also only updates the per-cpu list of its running cpu, do you think we can keep on just using local_irq_disable/enable? We can add an bug on in tdx_vcpu_load() to ensure (2). KVM_BUG_ON(cpu != raw_smp_processor_id(), vcpu->kvm); Or do you still prefer a per-vcpu raw_spin_lock + local_irq_disable/enable? Thanks Yan +struct associated_tdvcpus { + struct list_head list; + local_lock_t lock; +}; + /* * A per-CPU list of TD vCPUs associated with a given CPU. Used when a CPU * is brought down to invoke TDH_VP_FLUSH on the appropriate TD vCPUS. - * Protected by interrupt mask. This list is manipulated in process context + * Protected by local lock. This list is manipulated in process context * of vCPU and IPI callback. See tdx_flush_vp_on_cpu(). */ -static DEFINE_PER_CPU(struct list_head, associated_tdvcpus); +static DEFINE_PER_CPU(struct associated_tdvcpus, associated_tdvcpus); static __always_inline hpa_t set_hkid_to_hpa(hpa_t pa, u16 hkid) { @@ -338,19 +344,18 @@ static void tdx_flush_vp_on_cpu(struct kvm_vcpu *vcpu) void tdx_hardware_disable(void) { - int cpu = raw_smp_processor_id(); - struct list_head *tdvcpus = &per_cpu(associated_tdvcpus, cpu); + struct list_head *tdvcpus = this_cpu_ptr(&associated_tdvcpus.list); struct tdx_flush_vp_arg arg; struct vcpu_tdx *tdx, *tmp; unsigned long flags; - local_irq_save(flags); + local_lock_irqsave(&associated_tdvcpus.lock, flags); /* Safe variant needed as tdx_disassociate_vp() deletes the entry. */ list_for_each_entry_safe(tdx, tmp, tdvcpus, cpu_list) { arg.vcpu = &tdx->vcpu; tdx_flush_vp(&arg); } - local_irq_restore(flags); + local_unlock_irqrestore(&associated_tdvcpus.lock, flags); } static void smp_func_do_phymem_cache_wb(void *unused) @@ -609,15 +614,16 @@ void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) tdx_flush_vp_on_cpu(vcpu); - local_irq_disable(); + KVM_BUG_ON(cpu != raw_smp_processor_id(), vcpu->kvm); + local_lock_irq(&associated_tdvcpus.lock); /* * Pairs with the smp_wmb() in tdx_disassociate_vp() to ensure * vcpu->cpu is read before tdx->cpu_list. */ smp_rmb(); - list_add(&tdx->cpu_list, &per_cpu(associated_tdvcpus, cpu)); - local_irq_enable(); + list_add(&tdx->cpu_list, this_cpu_ptr(&associated_tdvcpus.list)); + local_unlock_irq(&associated_tdvcpus.lock); } void tdx_vcpu_free(struct kvm_vcpu *vcpu) @@ -2091,8 +2097,10 @@ static int __init __tdx_bringup(void) } /* tdx_hardware_disable() uses associated_tdvcpus. */ - for_each_possible_cpu(i) - INIT_LIST_HEAD(&per_cpu(associated_tdvcpus, i)); + for_each_possible_cpu(i) { + INIT_LIST_HEAD(&per_cpu(associated_tdvcpus.list, i)); + local_lock_init(&per_cpu(associated_tdvcpus.lock, i)); + } /* * Enabling TDX requires enabling hardware virtualization first,