This is a note to let you know that I've just added the patch titled KVM: VMX: avoid double list add with VT-d posted interrupts to the 4.13-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: kvm-vmx-avoid-double-list-add-with-vt-d-posted-interrupts.patch and it can be found in the queue-4.13 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. >From 8b306e2f3c41939ea528e6174c88cfbfff893ce1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini@xxxxxxxxxx> Date: Tue, 6 Jun 2017 12:57:05 +0200 Subject: KVM: VMX: avoid double list add with VT-d posted interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From: Paolo Bonzini <pbonzini@xxxxxxxxxx> commit 8b306e2f3c41939ea528e6174c88cfbfff893ce1 upstream. In some cases, for example involving hot-unplug of assigned devices, pi_post_block can forget to remove the vCPU from the blocked_vcpu_list. When this happens, the next call to pi_pre_block corrupts the list. Fix this in two ways. First, check vcpu->pre_pcpu in pi_pre_block and WARN instead of adding the element twice in the list. Second, always do the list removal in pi_post_block if vcpu->pre_pcpu is set (not -1). The new code keeps interrupts disabled for the whole duration of pi_pre_block/pi_post_block. This is not strictly necessary, but easier to follow. For the same reason, PI.ON is checked only after the cmpxchg, and to handle it we just call the post-block code. This removes duplication of the list removal code. Cc: Huangweidong <weidong.huang@xxxxxxxxxx> Cc: Gonglei <arei.gonglei@xxxxxxxxxx> Cc: wangxin <wangxinxin.wang@xxxxxxxxxx> Cc: Radim Krčmář <rkrcmar@xxxxxxxxxx> Tested-by: Longpeng (Mike) <longpeng2@xxxxxxxxxx> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- arch/x86/kvm/vmx.c | 62 +++++++++++++++++++++-------------------------------- 1 file changed, 25 insertions(+), 37 deletions(-) --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11394,10 +11394,11 @@ static void __pi_post_block(struct kvm_v struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); struct pi_desc old, new; unsigned int dest; - unsigned long flags; do { old.control = new.control = pi_desc->control; + WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR, + "Wakeup handler not enabled while the VCPU is blocked\n"); dest = cpu_physical_id(vcpu->cpu); @@ -11414,14 +11415,10 @@ static void __pi_post_block(struct kvm_v } while (cmpxchg(&pi_desc->control, old.control, new.control) != old.control); - if(vcpu->pre_pcpu != -1) { - spin_lock_irqsave( - &per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); + if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) { + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); list_del(&vcpu->blocked_vcpu_list); - spin_unlock_irqrestore( - &per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); vcpu->pre_pcpu = -1; } } @@ -11441,7 +11438,6 @@ static void __pi_post_block(struct kvm_v */ static int pi_pre_block(struct kvm_vcpu *vcpu) { - unsigned long flags; unsigned int dest; struct pi_desc old, new; struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); @@ -11451,34 +11447,20 @@ static int pi_pre_block(struct kvm_vcpu !kvm_vcpu_apicv_active(vcpu)) return 0; - vcpu->pre_pcpu = vcpu->cpu; - spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); - list_add_tail(&vcpu->blocked_vcpu_list, - &per_cpu(blocked_vcpu_on_cpu, - vcpu->pre_pcpu)); - spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); + WARN_ON(irqs_disabled()); + local_irq_disable(); + if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) { + vcpu->pre_pcpu = vcpu->cpu; + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + list_add_tail(&vcpu->blocked_vcpu_list, + &per_cpu(blocked_vcpu_on_cpu, + vcpu->pre_pcpu)); + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + } do { old.control = new.control = pi_desc->control; - /* - * We should not block the vCPU if - * an interrupt is posted for it. - */ - if (pi_test_on(pi_desc) == 1) { - spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); - list_del(&vcpu->blocked_vcpu_list); - spin_unlock_irqrestore( - &per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); - vcpu->pre_pcpu = -1; - - return 1; - } - WARN((pi_desc->sn == 1), "Warning: SN field of posted-interrupts " "is set before blocking\n"); @@ -11503,7 +11485,12 @@ static int pi_pre_block(struct kvm_vcpu } while (cmpxchg(&pi_desc->control, old.control, new.control) != old.control); - return 0; + /* We should not block the vCPU if an interrupt is posted for it. */ + if (pi_test_on(pi_desc) == 1) + __pi_post_block(vcpu); + + local_irq_enable(); + return (vcpu->pre_pcpu == -1); } static int vmx_pre_block(struct kvm_vcpu *vcpu) @@ -11519,12 +11506,13 @@ static int vmx_pre_block(struct kvm_vcpu static void pi_post_block(struct kvm_vcpu *vcpu) { - if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(vcpu)) + if (vcpu->pre_pcpu == -1) return; + WARN_ON(irqs_disabled()); + local_irq_disable(); __pi_post_block(vcpu); + local_irq_enable(); } static void vmx_post_block(struct kvm_vcpu *vcpu) Patches currently in stable-queue which might be from pbonzini@xxxxxxxxxx are queue-4.13/kvm-vmx-simplify-and-fix-vmx_vcpu_pi_load.patch queue-4.13/kvm-vmx-avoid-double-list-add-with-vt-d-posted-interrupts.patch queue-4.13/genirq-fix-cpumask-check-in-__irq_startup_managed.patch queue-4.13/kvm-nvmx-don-t-allow-l2-to-access-the-hardware-cr8.patch queue-4.13/kvm-x86-handle-async-pf-in-rcu-read-side-critical-sections.patch queue-4.13/kvm-vmx-extract-__pi_post_block.patch queue-4.13/kvm-nvmx-fix-host_cr3-host_cr4-cache.patch queue-4.13/kvm-vmx-do-not-bug-on-out-of-bounds-guest-irq.patch