On Mon, Jul 13, 2009 at 04:15:30PM +0300, Michael S. Tsirkin wrote: > On Mon, Jul 13, 2009 at 04:03:10PM +0300, Gleb Natapov wrote: > > On Mon, Jul 13, 2009 at 03:55:07PM +0300, Michael S. Tsirkin wrote: > > > On Sun, Jul 12, 2009 at 03:03:50PM +0300, Gleb Natapov wrote: > > > > > > > > Signed-off-by: Gleb Natapov <gleb@xxxxxxxxxx> > > > > --- > > > > include/linux/kvm_host.h | 2 +- > > > > virt/kvm/irq_comm.c | 55 +++++++++++++++++++++------------------------- > > > > virt/kvm/kvm_main.c | 1 - > > > > 3 files changed, 26 insertions(+), 32 deletions(-) > > > > > > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > > > > index f54a0d3..6756b3e 100644 > > > > --- a/include/linux/kvm_host.h > > > > +++ b/include/linux/kvm_host.h > > > > @@ -161,7 +161,7 @@ struct kvm { > > > > > > > > struct mutex irq_lock; > > > > #ifdef CONFIG_HAVE_KVM_IRQCHIP > > > > - struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */ > > > > + struct kvm_kernel_irq_routing_entry *irq_routing; > > > > struct hlist_head mask_notifier_list; > > > > #endif > > > > > > > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c > > > > index 7af18b8..b2fa3f6 100644 > > > > --- a/virt/kvm/irq_comm.c > > > > +++ b/virt/kvm/irq_comm.c > > > > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) > > > > * IOAPIC. So set the bit in both. The guest will ignore > > > > * writes to the unused one. > > > > */ > > > > - list_for_each_entry(e, &kvm->irq_routing, link) > > > > + rcu_read_lock(); > > > > + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) { > > > > if (e->gsi == irq) { > > > > int r = e->set(e, kvm, sig_level); > > > > if (r < 0) > > > > @@ -156,6 +157,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) > > > > > > > > ret = r + ((ret < 0) ? 0 : ret); > > > > } > > > > + } > > > > + rcu_read_unlock(); > > > > return ret; > > > > } > > > > > > > > @@ -168,12 +171,15 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) > > > > > > > > trace_kvm_ack_irq(irqchip, pin); > > > > > > > > - list_for_each_entry(e, &kvm->irq_routing, link) > > > > + rcu_read_lock(); > > > > + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) { > > > > if (e->irqchip.irqchip == irqchip && > > > > e->irqchip.pin == pin) { > > > > gsi = e->gsi; > > > > break; > > > > } > > > > + } > > > > + rcu_read_unlock(); > > > > > > > > hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link) > > > > if (kian->gsi == gsi) > > > > @@ -264,19 +270,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) > > > > kimn->func(kimn, mask); > > > > } > > > > > > > > -static void __kvm_free_irq_routing(struct list_head *irq_routing) > > > > -{ > > > > - struct kvm_kernel_irq_routing_entry *e, *n; > > > > - > > > > - list_for_each_entry_safe(e, n, irq_routing, link) > > > > - kfree(e); > > > > -} > > > > - > > > > void kvm_free_irq_routing(struct kvm *kvm) > > > > { > > > > - mutex_lock(&kvm->irq_lock); > > > > - __kvm_free_irq_routing(&kvm->irq_routing); > > > > - mutex_unlock(&kvm->irq_lock); > > > > + /* Called only during vm destruction. Nobody can use the pointer > > > > + at this stage */ > > > > + kfree(kvm->irq_routing); > > > > } > > > > > > > > static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, > > > > @@ -326,43 +324,40 @@ int kvm_set_irq_routing(struct kvm *kvm, > > > > unsigned nr, > > > > unsigned flags) > > > > { > > > > - struct list_head irq_list = LIST_HEAD_INIT(irq_list); > > > > - struct list_head tmp = LIST_HEAD_INIT(tmp); > > > > - struct kvm_kernel_irq_routing_entry *e = NULL; > > > > + struct kvm_kernel_irq_routing_entry *new, *old; > > > > unsigned i; > > > > int r; > > > > > > > > + /* last element is left zeroed and indicates the end of the array */ > > > > + new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL); > > > > > > There are up to 1K entries, and each one seems to be around 32 bytes. > > > Are there chances that we won't be able to find such a chunk of > > > contiguous memory on a busy system? > > > > > > Since the tmp list is never traversed while it is assigned, we can, instead, > > > build a new list as we did and simply replace list_splice with these bits from > > > list_splice_init_rcu: > > > > > > static inline void list_splice_tmp_rcu(struct list_head *tmp, > > > struct list_head *head) { > > > struct list_head *first = tmp->next; > > > struct list_head *last = tmp->prev; > > > struct list_head *at = head->next; > > > last->next = at; > > > rcu_assign_pointer(head->next, first); > > > first->prev = head; > > > at->prev = last; > > > } > > > > > Lets keep simple things simple. If there is real concern that 3-4 > > contiguous page will not be available we can use vmalloc() here. > > Hmm, 32 * (1K + 1) is usually 8-9 pages, and vmalloc is a finite We allocate only existing entries, not the whole array, and this usually means less then 20 entries. If we will get to the point where with current data structure we will use 1K entries much more serious problem will be that for each injected interrupt we will have to scan 1K entries. > resource. Maybe it's a good idea to use an array instead of a list. All > I'm saying, RCU does not force you to do this. > It doesn't, but list shouldn't be used here in the first place. > > But the > > not so long term plan is to not use irq routing table for msi injection > > (new ioctl kvm_msi_inject) and reduce table to much smaller size (may be > > make it hash). > > Why bother with an array as an intermediate step then? Incremental changes. I can't rewrite the whole kernel with one patch. Linus will reject it. > > > > > > > > + > > > > + if (!new) > > > > + return -ENOMEM; > > > > + > > > > for (i = 0; i < nr; ++i) { > > > > r = -EINVAL; > > > > if (ue->gsi >= KVM_MAX_IRQ_ROUTES) > > > > goto out; > > > > if (ue->flags) > > > > goto out; > > > > - r = -ENOMEM; > > > > - e = kzalloc(sizeof(*e), GFP_KERNEL); > > > > - if (!e) > > > > - goto out; > > > > - r = setup_routing_entry(e, ue); > > > > + r = setup_routing_entry(new + i, ue); > > > > if (r) > > > > goto out; > > > > ++ue; > > > > - list_add(&e->link, &irq_list); > > > > - e = NULL; > > > > } > > > > > > > > mutex_lock(&kvm->irq_lock); > > > > - list_splice(&kvm->irq_routing, &tmp); > > > > - INIT_LIST_HEAD(&kvm->irq_routing); > > > > - list_splice(&irq_list, &kvm->irq_routing); > > > > - INIT_LIST_HEAD(&irq_list); > > > > - list_splice(&tmp, &irq_list); > > > > + old = kvm->irq_routing; > > > > + rcu_assign_pointer(kvm->irq_routing, new); > > > > mutex_unlock(&kvm->irq_lock); > > > > > > > > + synchronize_rcu(); > > > > + > > > > r = 0; > > > > + new = old; > > > > > > > > out: > > > > - kfree(e); > > > > - __kvm_free_irq_routing(&irq_list); > > > > + kfree(new); > > > > return r; > > > > } > > > > > > > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > > > > index cf20dc1..24013b4 100644 > > > > --- a/virt/kvm/kvm_main.c > > > > +++ b/virt/kvm/kvm_main.c > > > > @@ -945,7 +945,6 @@ static struct kvm *kvm_create_vm(void) > > > > if (IS_ERR(kvm)) > > > > goto out; > > > > #ifdef CONFIG_HAVE_KVM_IRQCHIP > > > > - INIT_LIST_HEAD(&kvm->irq_routing); > > > > INIT_HLIST_HEAD(&kvm->mask_notifier_list); > > > > #endif > > > > > > > > -- > > > > 1.6.2.1 > > > > > > > > -- > > > > To unsubscribe from this list: send the line "unsubscribe kvm" in > > > > the body of a message to majordomo@xxxxxxxxxxxxxxx > > > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > > > -- > > Gleb. > > -- > > To unsubscribe from this list: send the line "unsubscribe kvm" in > > the body of a message to majordomo@xxxxxxxxxxxxxxx > > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html