On 06/06/2017 14:30, Longpeng (Mike) wrote: > > > On 2017/6/6 18:57, Paolo Bonzini wrote: > >> In some cases, for example involving hot-unplug of assigned >> devices, pi_post_block can forget to remove the vCPU from the >> blocked_vcpu_list. When this happens, the next call to >> pi_pre_block corrupts the list. >> >> Fix this in two ways. First, check vcpu->pre_pcpu in pi_pre_block >> and WARN instead of adding the element twice in the list. Second, >> always do the list removal in pi_post_block if vcpu->pre_pcpu is >> set (not -1). >> >> The new code keeps interrupts disabled for the whole duration of >> pi_pre_block/pi_post_block. This is not strictly necessary, but >> easier to follow. For the same reason, PI.ON is checked only >> after the cmpxchg, and to handle it we just call the post-block >> code. This removes duplication of the list removal code. >> >> Cc: Longpeng (Mike) <longpeng2@xxxxxxxxxx> >> Cc: Huangweidong <weidong.huang@xxxxxxxxxx> >> Cc: Gonglei <arei.gonglei@xxxxxxxxxx> >> Cc: wangxin <wangxinxin.wang@xxxxxxxxxx> >> Cc: Radim Krčmář <rkrcmar@xxxxxxxxxx> >> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> >> --- >> arch/x86/kvm/vmx.c | 62 ++++++++++++++++++++++-------------------------------- >> 1 file changed, 25 insertions(+), 37 deletions(-) >> > > > [...] > > >> @@ -11256,14 +11257,10 @@ static void __pi_post_block(struct kvm_vcpu *vcpu) >> } while (cmpxchg(&pi_desc->control, old.control, >> new.control) != old.control); >> >> - if(vcpu->pre_pcpu != -1) { >> - spin_lock_irqsave( >> - &per_cpu(blocked_vcpu_on_cpu_lock, >> - vcpu->pre_pcpu), flags); >> + if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) { >> + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); >> list_del(&vcpu->blocked_vcpu_list); >> - spin_unlock_irqrestore( >> - &per_cpu(blocked_vcpu_on_cpu_lock, >> - vcpu->pre_pcpu), flags); >> + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); > > > Hi Paolo, > > spin_lock_irqsave() will disable kernel preempt, but spin_lock() won't. is there > some potential problems ? Hi, This function (and pi_pre_block too's part where it takes the spin lock) runs with interrupts disabled now. Thanks, Paolo > Regards, > Longpeng(Mike) > >> vcpu->pre_pcpu = -1; >> } >> } >> @@ -11283,7 +11280,6 @@ static void __pi_post_block(struct kvm_vcpu *vcpu) >> */ >> static int pi_pre_block(struct kvm_vcpu *vcpu) >> { >> - unsigned long flags; >> unsigned int dest; >> struct pi_desc old, new; >> struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); >> @@ -11293,34 +11289,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu) >> !kvm_vcpu_apicv_active(vcpu)) >> return 0; >> >> - vcpu->pre_pcpu = vcpu->cpu; >> - spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, >> - vcpu->pre_pcpu), flags); >> - list_add_tail(&vcpu->blocked_vcpu_list, >> - &per_cpu(blocked_vcpu_on_cpu, >> - vcpu->pre_pcpu)); >> - spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock, >> - vcpu->pre_pcpu), flags); >> + WARN_ON(irqs_disabled()); >> + local_irq_disable(); >> + if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) { >> + vcpu->pre_pcpu = vcpu->cpu; >> + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); >> + list_add_tail(&vcpu->blocked_vcpu_list, >> + &per_cpu(blocked_vcpu_on_cpu, >> + vcpu->pre_pcpu)); >> + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); >> + } >> >> do { >> old.control = new.control = pi_desc->control; >> >> - /* >> - * We should not block the vCPU if >> - * an interrupt is posted for it. >> - */ >> - if (pi_test_on(pi_desc) == 1) { >> - spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, >> - vcpu->pre_pcpu), flags); >> - list_del(&vcpu->blocked_vcpu_list); >> - spin_unlock_irqrestore( >> - &per_cpu(blocked_vcpu_on_cpu_lock, >> - vcpu->pre_pcpu), flags); >> - vcpu->pre_pcpu = -1; >> - >> - return 1; >> - } >> - >> WARN((pi_desc->sn == 1), >> "Warning: SN field of posted-interrupts " >> "is set before blocking\n"); >> @@ -11345,7 +11327,12 @@ static int pi_pre_block(struct kvm_vcpu *vcpu) >> } while (cmpxchg(&pi_desc->control, old.control, >> new.control) != old.control); >> >> - return 0; >> + /* We should not block the vCPU if an interrupt is posted for it. */ >> + if (pi_test_on(pi_desc) == 1) >> + __pi_post_block(vcpu); >> + >> + local_irq_enable(); >> + return (vcpu->pre_pcpu == -1); >> } >> >> static int vmx_pre_block(struct kvm_vcpu *vcpu) >> @@ -11361,12 +11348,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu) >> >> static void pi_post_block(struct kvm_vcpu *vcpu) >> { >> - if (!kvm_arch_has_assigned_device(vcpu->kvm) || >> - !irq_remapping_cap(IRQ_POSTING_CAP) || >> - !kvm_vcpu_apicv_active(vcpu)) >> + if (vcpu->pre_pcpu == -1) >> return; >> >> + WARN_ON(irqs_disabled()); >> + local_irq_disable(); >> __pi_post_block(vcpu); >> + local_irq_enable(); >> } >> >> static void vmx_post_block(struct kvm_vcpu *vcpu) > >