On Tue, Jul 17, 2012 at 07:14:52PM +0300, Michael S. Tsirkin wrote: > > _Seems_ racy, or _is_ racy? Please identify the race. > > Look at this: > > static inline int kvm_irq_line_state(unsigned long *irq_state, > int irq_source_id, int level) > { > /* Logical OR for level trig interrupt */ > if (level) > set_bit(irq_source_id, irq_state); > else > clear_bit(irq_source_id, irq_state); > > return !!(*irq_state); > } > > > Now: > If other CPU changes some other bit after the atomic change, > it looks like !!(*irq_state) might return a stale value. > > CPU 0 clears bit 0. CPU 1 sets bit 1. CPU 1 sets level to 1. > If CPU 0 sees a stale value now it will return 0 here > and interrupt will get cleared. > This will hardly happen on x86 especially since bit is set with serialized instruction. But there is actually a race here. CPU 0 clears bit 0. CPU 0 read irq_state as 0. CPU 1 sets level to 1. CPU 1 calls kvm_ioapic_set_irq(1). CPU 0 calls kvm_ioapic_set_irq(0). No ioapic thinks the level is 0 but irq_state is not 0. This untested and un-compiled patch should fix it. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ef91d79..e22c78b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -825,7 +825,7 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); -int kvm_pic_set_irq(void *opaque, int irq, int level); +int kvm_pic_set_irq(void *opaque, int irq); void kvm_inject_nmi(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 81cf4fa..0d6988f 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -188,12 +188,13 @@ void kvm_pic_update_irq(struct kvm_pic *s) pic_unlock(s); } -int kvm_pic_set_irq(void *opaque, int irq, int level) +int kvm_pic_set_irq(void *opaque, int irq) { struct kvm_pic *s = opaque; - int ret = -1; + int ret = -1, level; pic_lock(s); + level = !!s->irq_states[irq]; if (irq >= 0 && irq < PIC_NUM_PINS) { ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); pic_update_irq(s); diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 26fd54d..6ad6a6b 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -191,14 +191,15 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); } -int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) +int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq) { u32 old_irr; u32 mask = 1 << irq; union kvm_ioapic_redirect_entry entry; - int ret = 1; + int ret = 1, level; spin_lock(&ioapic->lock); + level = !!ioapic->irq_states[irq]; old_irr = ioapic->irr; if (irq >= 0 && irq < IOAPIC_NUM_PINS) { entry = ioapic->redirtbl[irq]; diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 32872a0..65894dd 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -74,7 +74,7 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector); int kvm_ioapic_init(struct kvm *kvm); void kvm_ioapic_destroy(struct kvm *kvm); -int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); +int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq); void kvm_ioapic_reset(struct kvm_ioapic *ioapic); int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index a6a0365..db0ccef 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -33,7 +33,7 @@ #include "ioapic.h" -static inline int kvm_irq_line_state(unsigned long *irq_state, +static inline void kvm_irq_line_state(unsigned long *irq_state, int irq_source_id, int level) { /* Logical OR for level trig interrupt */ @@ -41,8 +41,6 @@ static inline int kvm_irq_line_state(unsigned long *irq_state, set_bit(irq_source_id, irq_state); else clear_bit(irq_source_id, irq_state); - - return !!(*irq_state); } static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, @@ -50,9 +48,9 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, { #ifdef CONFIG_X86 struct kvm_pic *pic = pic_irqchip(kvm); - level = kvm_irq_line_state(&pic->irq_states[e->irqchip.pin], + kvm_irq_line_state(&pic->irq_states[e->irqchip.pin], irq_source_id, level); - return kvm_pic_set_irq(pic, e->irqchip.pin, level); + return kvm_pic_set_irq(pic, e->irqchip.pin); #else return -1; #endif @@ -62,10 +60,10 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; - level = kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin], + kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin], irq_source_id, level); - return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, level); + return kvm_ioapic_set_irq(ioapic, e->irqchip.pin); } inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html