Re: [PATCH v2] KVM: x86: pull kvm->srcu read-side to kvm_arch_vcpu_ioctl_run

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 19/2/2022 5:34 pm, Paolo Bonzini wrote:
kvm_arch_vcpu_ioctl_run is already doing srcu_read_lock/unlock in two
places, namely vcpu_run and post_kvm_run_save, and a third is actually
needed around the call to vcpu->arch.complete_userspace_io to avoid
the following splat:

   WARNING: suspicious RCU usage
   arch/x86/kvm/pmu.c:190 suspicious rcu_dereference_check() usage!
   other info that might help us debug this:
   rcu_scheduler_active = 2, debug_locks = 1
   1 lock held by CPU 28/KVM/370841:
   #0: ff11004089f280b8 (&vcpu->mutex){+.+.}-{3:3}, at: kvm_vcpu_ioctl+0x87/0x730 [kvm]
   Call Trace:
    <TASK>
    dump_stack_lvl+0x59/0x73
    reprogram_fixed_counter+0x15d/0x1a0 [kvm]
    kvm_pmu_trigger_event+0x1a3/0x260 [kvm]
    ? free_moved_vector+0x1b4/0x1e0
    complete_fast_pio_in+0x8a/0xd0 [kvm]

This splat is not at all unexpected, since complete_userspace_io
callbacks can execute similar code to vmexits.  For example, SVM
with nrips=false will call into the emulator from
svm_skip_emulated_instruction().

Reported-by: Like Xu <likexu@xxxxxxxxxxx>
Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
	v2: actually commit what I tested... srcu_read_lock must be
	    before all "goto out"s.

  arch/x86/kvm/x86.c | 19 ++++++++-----------
  1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 82a9dcd8c67f..e55de9b48d1a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9180,6 +9180,7 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
  		likely(!pic_in_kernel(vcpu->kvm));
  }
+/* Called within kvm->srcu read side. */
  static void post_kvm_run_save(struct kvm_vcpu *vcpu)
  {
  	struct kvm_run *kvm_run = vcpu->run;
@@ -9188,16 +9189,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
  	kvm_run->cr8 = kvm_get_cr8(vcpu);
  	kvm_run->apic_base = kvm_get_apic_base(vcpu);
- /*
-	 * The call to kvm_ready_for_interrupt_injection() may end up in
-	 * kvm_xen_has_interrupt() which may require the srcu lock to be
-	 * held, to protect against changes in the vcpu_info address.
-	 */
-	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
  	kvm_run->ready_for_interrupt_injection =
  		pic_in_kernel(vcpu->kvm) ||
  		kvm_vcpu_ready_for_interrupt_injection(vcpu);
-	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (is_smm(vcpu))
  		kvm_run->flags |= KVM_RUN_X86_SMM;
@@ -9815,6 +9809,7 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
  EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
/*
+ * Called within kvm->srcu read side.
   * Returns 1 to let vcpu_run() continue the guest execution loop without
   * exiting to the userspace.  Otherwise, the value will be returned to the
   * userspace.
@@ -10193,6 +10188,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
  	return r;
  }
+/* Called within kvm->srcu read side. */
  static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
  {
  	bool hv_timer;
@@ -10252,12 +10248,12 @@ static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
  		!vcpu->arch.apf.halted);
  }
+/* Called within kvm->srcu read side. */
  static int vcpu_run(struct kvm_vcpu *vcpu)
  {
  	int r;
  	struct kvm *kvm = vcpu->kvm;
- vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
  	vcpu->arch.l1tf_flush_l1d = true;
for (;;) {
@@ -10291,8 +10287,6 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
  		}
  	}
- srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
-
  	return r;
  }
@@ -10398,6 +10392,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
  {
  	struct kvm_run *kvm_run = vcpu->run;
+	struct kvm *kvm = vcpu->kvm;
  	int r;
vcpu_load(vcpu);
@@ -10405,6 +10400,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
  	kvm_run->flags = 0;
  	kvm_load_guest_fpu(vcpu);
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);

With this patch (based on ec756e40e271), the kworker/140:1+rcu_gp task on the host
will be overused that it won't even be possible to quickly boot a 2U4G VM with QEMU.

  	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
  		if (kvm_run->immediate_exit) {
  			r = -EINTR;
@@ -10475,8 +10471,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
  	if (kvm_run->kvm_valid_regs)
  		store_regs(vcpu);
  	post_kvm_run_save(vcpu);
-	kvm_sigset_deactivate(vcpu);
+	srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+ kvm_sigset_deactivate(vcpu);
  	vcpu_put(vcpu);
  	return r;
  }



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux