On Saturday 28 February 2009 01:54:31 Marcelo Tosatti wrote: > On Fri, Feb 27, 2009 at 12:17:01PM +0800, Yang, Sheng wrote: > > On Friday 27 February 2009 07:50:54 Marcelo Tosatti wrote: > > > Can someone with HW test this please? > > > > Good catch! The patch works fine on my side. > > > > Can it be a per-device lock? One big lock for all assigned device seems > > restrict scalability. > > Since all state is per-device, yes. > > Can you please review, test and ack the patch below? I just checked dmesg and got this... [ 1105.343824] ------------[ cut here ]------------ [ 1105.347814] WARNING: at kernel/smp.c:226 smp_call_function_single+0x41/0x10b() [ 1105.347814] Hardware name: To Be Filled By O.E.M. [ 1105.347814] Modules linked in: kvm_intel kvm bridge stp llc i2c_dev i2c_core e1000 e1000e ehci_hcd ohci_hcd uhci_hcd [ 1105.347814] Pid: 9, comm: events/0 Tainted: G W 2.6.29-rc4-00001- gd5b5623 #20 [ 1105.347814] Call Trace: [ 1105.347814] [<ffffffff80239ddc>] warn_slowpath+0xd3/0xf2 [ 1105.347814] [<ffffffff8022ec6d>] ? __enqueue_entity+0x74/0x76 [ 1105.347814] [<ffffffff8022ed1c>] ? enqueue_entity+0xad/0xb6 [ 1105.347814] [<ffffffff80232771>] ? try_to_wake_up+0x1ff/0x211 [ 1105.347814] [<ffffffff80260061>] smp_call_function_single+0x41/0x10b [ 1105.347814] [<ffffffffa0089bc5>] kvm_vcpu_kick+0x74/0x7c [kvm] [ 1105.347814] [<ffffffffa009c559>] kvm_apic_set_irq+0x70/0x77 [kvm] [ 1105.347814] [<ffffffffa009dfa8>] kvm_set_msi+0xe8/0x10d [kvm] [ 1105.347814] [<ffffffffa00869b6>] ? kvm_assigned_dev_interrupt_work_handler+0x30/0xfd [kvm] [ 1105.347814] [<ffffffffa009e0a4>] kvm_set_irq+0x6f/0xb3 [kvm] [ 1105.347814] [<ffffffffa0086a02>] kvm_assigned_dev_interrupt_work_handler+0x7c/0xfd [kvm] [ 1105.347814] [<ffffffffa0086986>] ? kvm_assigned_dev_interrupt_work_handler+0x0/0xfd [kvm] [ 1105.347814] [<ffffffff802497ac>] run_workqueue+0xf5/0x1fd [ 1105.347814] [<ffffffff80249756>] ? run_workqueue+0x9f/0x1fd [ 1105.347814] [<ffffffff8024a49e>] worker_thread+0xdb/0xe8 [ 1105.347814] [<ffffffff8024d342>] ? autoremove_wake_function+0x0/0x38 [ 1105.347814] [<ffffffff8024a3c3>] ? worker_thread+0x0/0xe8 [ 1105.347814] [<ffffffff8024d231>] kthread+0x49/0x78 [ 1105.347814] [<ffffffff8020c53a>] child_rip+0xa/0x20 [ 1105.347814] [<ffffffff8020bf3c>] ? restore_args+0x0/0x30 [ 1105.347814] [<ffffffff802342c2>] ? finish_task_switch+0x0/0xf3 [ 1105.347814] [<ffffffff8024d1e8>] ? kthread+0x0/0x78 [ 1105.347814] [<ffffffff8020c530>] ? child_rip+0x0/0x20 [ 1105.347814] ---[ end trace 3b3fe301343db608 ]--- -- regards Yang, Sheng > Thanks. > > > > ----- > > > > > > kvm_assigned_dev_ack_irq is vulnerable to a race condition with the > > > interrupt handler function. It does: > > > > > > if (dev->host_irq_disabled) { > > > enable_irq(dev->host_irq); > > > dev->host_irq_disabled = false; > > > } > > > > > > If an interrupt triggers before the host->dev_irq_disabled assignment, > > > it will disable the interrupt and set dev->host_irq_disabled to true. > > > > > > On return to kvm_assigned_dev_ack_irq, dev->host_irq_disabled is set to > > > false, and the next kvm_assigned_dev_ack_irq call will fail to reenable > > > it. > > > > > > Other than that, having the interrupt handler and work handlers run in > > > parallel sounds like asking for trouble (could not spot any obvious > > > problem, but better not have to, its fragile). > > > > Well, my original purpose is a FIFO between interrupt handler and > > work(for MSI-X), but seems too complex... And I also don't see any > > problem for now... > > > > -- > > regards > > Yang, Sheng > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 3832243..16bf60b 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -349,6 +349,7 @@ struct kvm_assigned_dev_kernel { > int flags; > struct pci_dev *dev; > struct kvm *kvm; > + spinlock_t assigned_dev_lock; > }; > > struct kvm_irq_mask_notifier { > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 4d2be16..b7d060f 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -41,6 +41,7 @@ > #include <linux/pagemap.h> > #include <linux/mman.h> > #include <linux/swap.h> > +#include <linux/spinlock.h> > > #include <asm/processor.h> > #include <asm/io.h> > @@ -132,6 +133,7 @@ static void > kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) * > finer-grained lock, update this > */ > mutex_lock(&kvm->lock); > + spin_lock_irq(&assigned_dev->assigned_dev_lock); > if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) { > struct kvm_guest_msix_entry *guest_entries = > assigned_dev->guest_msix_entries; > @@ -158,18 +160,21 @@ static void > kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) } > } > > + spin_unlock_irq(&assigned_dev->assigned_dev_lock); > mutex_unlock(&assigned_dev->kvm->lock); > } > > static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) > { > + unsigned long flags; > struct kvm_assigned_dev_kernel *assigned_dev = > (struct kvm_assigned_dev_kernel *) dev_id; > > + spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); > if (assigned_dev->irq_requested_type == KVM_ASSIGNED_DEV_MSIX) { > int index = find_index_from_host_irq(assigned_dev, irq); > if (index < 0) > - return IRQ_HANDLED; > + goto out; > assigned_dev->guest_msix_entries[index].flags |= > KVM_ASSIGNED_MSIX_PENDING; > } > @@ -179,6 +184,8 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void > *dev_id) disable_irq_nosync(irq); > assigned_dev->host_irq_disabled = true; > > +out: > + spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); > return IRQ_HANDLED; > } > > @@ -186,6 +193,7 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void > *dev_id) static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier > *kian) { > struct kvm_assigned_dev_kernel *dev; > + unsigned long flags; > > if (kian->gsi == -1) > return; > @@ -198,10 +206,12 @@ static void kvm_assigned_dev_ack_irq(struct > kvm_irq_ack_notifier *kian) /* The guest irq may be shared so this ack may > be > * from another device. > */ > + spin_lock_irqsave(&dev->assigned_dev_lock, flags); > if (dev->host_irq_disabled) { > enable_irq(dev->host_irq); > dev->host_irq_disabled = false; > } > + spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); > } > > /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ > @@ -604,6 +614,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, > match->dev = dev; > match->irq_source_id = -1; > match->kvm = kvm; > + spin_lock_init(&match->assigned_dev_lock); > > list_add(&match->list, &kvm->arch.assigned_dev_head); -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html