On Thu, Sep 01, 2022 at 07:17:45PM -0700, isaku.yamahata@xxxxxxxxx wrote: > From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> > > Because kvm_count_lock unnecessarily complicates the KVM locking convention > Drop kvm_count_lock and instead protect kvm_usage_count with kvm_lock for > simplicity. > > Opportunistically add some comments on locking. > > Suggested-by: Sean Christopherson <seanjc@xxxxxxxxxx> > Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> > --- > Documentation/virt/kvm/locking.rst | 14 +++++------- > virt/kvm/kvm_main.c | 34 ++++++++++++++++++++---------- > 2 files changed, 28 insertions(+), 20 deletions(-) > > diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst > index 845a561629f1..8957e32aa724 100644 > --- a/Documentation/virt/kvm/locking.rst > +++ b/Documentation/virt/kvm/locking.rst > @@ -216,15 +216,11 @@ time it will be set using the Dirty tracking mechanism described above. > :Type: mutex > :Arch: any > :Protects: - vm_list > - > -``kvm_count_lock`` > -^^^^^^^^^^^^^^^^^^ > - > -:Type: raw_spinlock_t > -:Arch: any > -:Protects: - hardware virtualization enable/disable > -:Comment: 'raw' because hardware enabling/disabling must be atomic /wrt > - migration. > + - kvm_usage_count > + - hardware virtualization enable/disable > +:Comment: Use cpus_read_lock() for hardware virtualization enable/disable > + because hardware enabling/disabling must be atomic /wrt > + migration. The lock order is cpus lock => kvm_lock. > > ``kvm->mn_invalidate_lock`` > ^^^^^^^^^^^^^^^^^^^^^^^^^^^ > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index fc55447c4dba..082d5dbc8d7f 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -100,7 +100,6 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink); > */ > > DEFINE_MUTEX(kvm_lock); > -static DEFINE_RAW_SPINLOCK(kvm_count_lock); > LIST_HEAD(vm_list); > > static cpumask_var_t cpus_hardware_enabled; > @@ -4996,6 +4995,8 @@ static void hardware_enable_nolock(void *caller_name) > int cpu = raw_smp_processor_id(); > int r; > > + WARN_ON_ONCE(preemptible()); This looks incorrect, it may triggers everytime when online CPU. Because patch 7 moved CPUHP_AP_KVM_STARTING *AFTER* CPUHP_AP_ONLINE_IDLE as CPUHP_AP_KVM_ONLINE, then cpuhp_thread_fun() runs the new CPUHP_AP_KVM_ONLINE in *non-atomic* context: cpuhp_thread_fun(unsigned int cpu) { ... if (cpuhp_is_atomic_state(state)) { local_irq_disable(); st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last); local_irq_enable(); WARN_ON_ONCE(st->result); } else { st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last); } ... } static bool cpuhp_is_atomic_state(enum cpuhp_state state) { return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE; } The hardware_enable_nolock() now is called in 2 cases: 1. in atomic context by on_each_cpu(). 2. From non-atomic context by CPU hotplug thread. so how about "WARN_ONCE(preemptible() && cpu_active(cpu))" ? > + > if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) > return; > > @@ -5019,7 +5020,7 @@ static int kvm_online_cpu(unsigned int cpu) > if (ret) > return ret; > > - raw_spin_lock(&kvm_count_lock); > + mutex_lock(&kvm_lock); > /* > * Abort the CPU online process if hardware virtualization cannot > * be enabled. Otherwise running VMs would encounter unrecoverable > @@ -5034,7 +5035,7 @@ static int kvm_online_cpu(unsigned int cpu) > ret = -EIO; > } > } > - raw_spin_unlock(&kvm_count_lock); > + mutex_unlock(&kvm_lock); > return ret; > } > > @@ -5042,6 +5043,8 @@ static void hardware_disable_nolock(void *junk) > { > int cpu = raw_smp_processor_id(); > > + WARN_ON_ONCE(preemptible()); Ditto. > + > if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) > return; > cpumask_clear_cpu(cpu, cpus_hardware_enabled); > @@ -5050,10 +5053,10 @@ static void hardware_disable_nolock(void *junk) > > static int kvm_offline_cpu(unsigned int cpu) > { > - raw_spin_lock(&kvm_count_lock); > + mutex_lock(&kvm_lock); > if (kvm_usage_count) > hardware_disable_nolock(NULL); > - raw_spin_unlock(&kvm_count_lock); > + mutex_unlock(&kvm_lock); > return 0; > } > > @@ -5068,9 +5071,11 @@ static void hardware_disable_all_nolock(void) > > static void hardware_disable_all(void) > { > - raw_spin_lock(&kvm_count_lock); > + cpus_read_lock(); > + mutex_lock(&kvm_lock); > hardware_disable_all_nolock(); > - raw_spin_unlock(&kvm_count_lock); > + mutex_unlock(&kvm_lock); > + cpus_read_unlock(); > } > > static int hardware_enable_all(void) > @@ -5088,7 +5093,7 @@ static int hardware_enable_all(void) > * Disable CPU hotplug to prevent this case from happening. > */ > cpus_read_lock(); > - raw_spin_lock(&kvm_count_lock); > + mutex_lock(&kvm_lock); > > kvm_usage_count++; > if (kvm_usage_count == 1) { > @@ -5101,7 +5106,7 @@ static int hardware_enable_all(void) > } > } > > - raw_spin_unlock(&kvm_count_lock); > + mutex_unlock(&kvm_lock); > cpus_read_unlock(); > > return r; > @@ -5708,8 +5713,15 @@ static void kvm_init_debug(void) > > static int kvm_suspend(void) > { > - if (kvm_usage_count) > + /* > + * The caller ensures that CPU hotlug is disabled by > + * cpu_hotplug_disable() and other CPUs are offlined. No need for > + * locking. > + */ > + if (kvm_usage_count) { > + lockdep_assert_not_held(&kvm_lock); > hardware_disable_nolock(NULL); > + } > return 0; > } > > @@ -5723,7 +5735,7 @@ static void kvm_resume(void) > return; /* FIXME: disable KVM */ > > if (kvm_usage_count) { > - lockdep_assert_not_held(&kvm_count_lock); > + lockdep_assert_not_held(&kvm_lock); > hardware_enable_nolock((void *)__func__); > } > } > -- > 2.25.1 >