Ack and mask notifiers typically call back into kvm_set_irq, thus may iterate over all VCPUs of a VM. Better keep this path preemptible to prevent that user-space can massivle influence scheduling latencies. Use sleepable RCU for the protection of irq_routing and the notfier lists. Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx> --- include/linux/kvm_host.h | 1 + virt/kvm/irq_comm.c | 31 +++++++++++++++++-------------- virt/kvm/kvm_main.c | 5 +++++ 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index bcf71c7..83bf8e1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -238,6 +238,7 @@ struct kvm { #endif struct mutex irq_lock; + struct srcu_struct irq_srcu; #ifdef CONFIG_HAVE_KVM_IRQCHIP struct kvm_irq_routing_table __rcu *irq_routing; struct hlist_head mask_notifier_list; diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 8edca91..2331587 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -150,6 +150,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) int ret = -1, i = 0; struct kvm_irq_routing_table *irq_rt; struct hlist_node *n; + int idx; trace_kvm_set_irq(irq, level, irq_source_id); @@ -157,12 +158,12 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. */ - rcu_read_lock(); - irq_rt = rcu_dereference(kvm->irq_routing); + idx = srcu_read_lock(&kvm->irq_srcu); + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); if (irq < irq_rt->nr_rt_entries) hlist_for_each_entry(e, n, &irq_rt->map[irq], link) irq_set[i++] = *e; - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); while(i--) { int r; @@ -180,18 +181,19 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { struct kvm_irq_ack_notifier *kian; struct hlist_node *n; - int gsi; + int gsi, idx; trace_kvm_ack_irq(irqchip, pin); - rcu_read_lock(); - gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = srcu_dereference(kvm->irq_routing, + &kvm->irq_srcu)->chip[irqchip][pin]; if (gsi != -1) hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link) if (kian->gsi == gsi) kian->irq_acked(kian); - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); } void kvm_register_irq_ack_notifier(struct kvm *kvm, @@ -208,7 +210,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, mutex_lock(&kvm->irq_lock); hlist_del_init_rcu(&kian->link); mutex_unlock(&kvm->irq_lock); - synchronize_rcu(); + synchronize_srcu(&kvm->irq_srcu); } int kvm_request_irq_source_id(struct kvm *kvm) @@ -276,7 +278,7 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, mutex_lock(&kvm->irq_lock); hlist_del_rcu(&kimn->link); mutex_unlock(&kvm->irq_lock); - synchronize_rcu(); + synchronize_srcu(&kvm->irq_srcu); } void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, @@ -284,15 +286,16 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, { struct kvm_irq_mask_notifier *kimn; struct hlist_node *n; - int gsi; + int gsi, idx; - rcu_read_lock(); - gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = srcu_dereference(kvm->irq_routing, + &kvm->irq_srcu)->chip[irqchip][pin]; if (gsi != -1) hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link) if (kimn->irq == gsi) kimn->func(kimn, mask); - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); } void kvm_free_irq_routing(struct kvm *kvm) @@ -411,7 +414,7 @@ int kvm_set_irq_routing(struct kvm *kvm, old = kvm->irq_routing; rcu_assign_pointer(kvm->irq_routing, new); mutex_unlock(&kvm->irq_lock); - synchronize_rcu(); + synchronize_srcu(&kvm->irq_srcu); new = old; r = 0; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6cfcde7..b0ac2c0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -404,6 +404,8 @@ static struct kvm *kvm_create_vm(void) goto out_err_nosrcu; if (init_srcu_struct(&kvm->srcu)) goto out_err_nosrcu; + if (init_srcu_struct(&kvm->irq_srcu)) + goto out_err_noirqsrcu; for (i = 0; i < KVM_NR_BUSES; i++) { kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); @@ -431,6 +433,8 @@ out: return kvm; out_err: + cleanup_srcu_struct(&kvm->irq_srcu); +out_err_noirqsrcu: cleanup_srcu_struct(&kvm->srcu); out_err_nosrcu: hardware_disable_all(); @@ -511,6 +515,7 @@ static void kvm_destroy_vm(struct kvm *kvm) #else kvm_arch_flush_shadow(kvm); #endif + cleanup_srcu_struct(&kvm->irq_srcu); cleanup_srcu_struct(&kvm->srcu); kvm_arch_destroy_vm(kvm); hardware_disable_all(); -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html