On 2017/6/6 20:35, Paolo Bonzini wrote: > > > On 06/06/2017 14:30, Longpeng (Mike) wrote: >> >> >> On 2017/6/6 18:57, Paolo Bonzini wrote: >> >>> In some cases, for example involving hot-unplug of assigned >>> devices, pi_post_block can forget to remove the vCPU from the >>> blocked_vcpu_list. When this happens, the next call to >>> pi_pre_block corrupts the list. >>> >>> Fix this in two ways. First, check vcpu->pre_pcpu in pi_pre_block >>> and WARN instead of adding the element twice in the list. Second, >>> always do the list removal in pi_post_block if vcpu->pre_pcpu is >>> set (not -1). >>> >>> The new code keeps interrupts disabled for the whole duration of >>> pi_pre_block/pi_post_block. This is not strictly necessary, but >>> easier to follow. For the same reason, PI.ON is checked only >>> after the cmpxchg, and to handle it we just call the post-block >>> code. This removes duplication of the list removal code. >>> >>> Cc: Longpeng (Mike) <longpeng2@xxxxxxxxxx> >>> Cc: Huangweidong <weidong.huang@xxxxxxxxxx> >>> Cc: Gonglei <arei.gonglei@xxxxxxxxxx> >>> Cc: wangxin <wangxinxin.wang@xxxxxxxxxx> >>> Cc: Radim Krčmář <rkrcmar@xxxxxxxxxx> >>> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> >>> --- >>> arch/x86/kvm/vmx.c | 62 ++++++++++++++++++++++-------------------------------- >>> 1 file changed, 25 insertions(+), 37 deletions(-) >>> >> >> >> [...] >> >> >>> @@ -11256,14 +11257,10 @@ static void __pi_post_block(struct kvm_vcpu *vcpu) >>> } while (cmpxchg(&pi_desc->control, old.control, >>> new.control) != old.control); >>> >>> - if(vcpu->pre_pcpu != -1) { >>> - spin_lock_irqsave( >>> - &per_cpu(blocked_vcpu_on_cpu_lock, >>> - vcpu->pre_pcpu), flags); >>> + if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) { >>> + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); >>> list_del(&vcpu->blocked_vcpu_list); >>> - spin_unlock_irqrestore( >>> - &per_cpu(blocked_vcpu_on_cpu_lock, >>> - vcpu->pre_pcpu), flags); >>> + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); >> >> >> Hi Paolo, >> >> spin_lock_irqsave() will disable kernel preempt, but spin_lock() won't. is there >> some potential problems ? > > Hi, > > This function (and pi_pre_block too's part where it takes the spin lock) > runs with interrupts disabled now. > Oh, yes, please forgive my foolish. We'll continue to find why the list is corrupt when repeat poweron/shutdown Thanks. > Thanks, > > Paolo > >> Regards, >> Longpeng(Mike) >> >>> vcpu->pre_pcpu = -1; >>> } >>> } >>> @@ -11283,7 +11280,6 @@ static void __pi_post_block(struct kvm_vcpu *vcpu) >>> */ >>> static int pi_pre_block(struct kvm_vcpu *vcpu) >>> { >>> - unsigned long flags; >>> unsigned int dest; >>> struct pi_desc old, new; >>> struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); >>> @@ -11293,34 +11289,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu) >>> !kvm_vcpu_apicv_active(vcpu)) >>> return 0; >>> >>> - vcpu->pre_pcpu = vcpu->cpu; >>> - spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, >>> - vcpu->pre_pcpu), flags); >>> - list_add_tail(&vcpu->blocked_vcpu_list, >>> - &per_cpu(blocked_vcpu_on_cpu, >>> - vcpu->pre_pcpu)); >>> - spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock, >>> - vcpu->pre_pcpu), flags); >>> + WARN_ON(irqs_disabled()); >>> + local_irq_disable(); >>> + if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) { >>> + vcpu->pre_pcpu = vcpu->cpu; >>> + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); >>> + list_add_tail(&vcpu->blocked_vcpu_list, >>> + &per_cpu(blocked_vcpu_on_cpu, >>> + vcpu->pre_pcpu)); >>> + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); >>> + } >>> >>> do { >>> old.control = new.control = pi_desc->control; >>> >>> - /* >>> - * We should not block the vCPU if >>> - * an interrupt is posted for it. >>> - */ >>> - if (pi_test_on(pi_desc) == 1) { >>> - spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, >>> - vcpu->pre_pcpu), flags); >>> - list_del(&vcpu->blocked_vcpu_list); >>> - spin_unlock_irqrestore( >>> - &per_cpu(blocked_vcpu_on_cpu_lock, >>> - vcpu->pre_pcpu), flags); >>> - vcpu->pre_pcpu = -1; >>> - >>> - return 1; >>> - } >>> - >>> WARN((pi_desc->sn == 1), >>> "Warning: SN field of posted-interrupts " >>> "is set before blocking\n"); >>> @@ -11345,7 +11327,12 @@ static int pi_pre_block(struct kvm_vcpu *vcpu) >>> } while (cmpxchg(&pi_desc->control, old.control, >>> new.control) != old.control); >>> >>> - return 0; >>> + /* We should not block the vCPU if an interrupt is posted for it. */ >>> + if (pi_test_on(pi_desc) == 1) >>> + __pi_post_block(vcpu); >>> + >>> + local_irq_enable(); >>> + return (vcpu->pre_pcpu == -1); >>> } >>> >>> static int vmx_pre_block(struct kvm_vcpu *vcpu) >>> @@ -11361,12 +11348,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu) >>> >>> static void pi_post_block(struct kvm_vcpu *vcpu) >>> { >>> - if (!kvm_arch_has_assigned_device(vcpu->kvm) || >>> - !irq_remapping_cap(IRQ_POSTING_CAP) || >>> - !kvm_vcpu_apicv_active(vcpu)) >>> + if (vcpu->pre_pcpu == -1) >>> return; >>> >>> + WARN_ON(irqs_disabled()); >>> + local_irq_disable(); >>> __pi_post_block(vcpu); >>> + local_irq_enable(); >>> } >>> >>> static void vmx_post_block(struct kvm_vcpu *vcpu) >> >> > > . > -- Regards, Longpeng(Mike)