> -----Original Message----- > From: kvm-owner@xxxxxxxxxxxxxxx [mailto:kvm-owner@xxxxxxxxxxxxxxx] On > Behalf Of Paolo Bonzini > Sent: Wednesday, August 05, 2015 11:24 PM > To: linux-kernel@xxxxxxxxxxxxxxx; kvm@xxxxxxxxxxxxxxx > Cc: Steve Rutherford; rkrcmar@xxxxxxxxxx > Subject: [PATCH 8/9] KVM: x86: Add EOI exit bitmap inference > > From: Steve Rutherford <srutherford@xxxxxxxxxx> > > In order to support a userspace IOAPIC interacting with an in kernel > APIC, the EOI exit bitmaps need to be configurable. > > If the IOAPIC is in userspace (i.e. the irqchip has been split), the > EOI exit bitmaps will be set whenever the GSI Routes are configured. > In particular, for the low MSI routes are reservable for userspace > IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the > destination vector of the route will be set for the destination VCPU. > > The intention is for the userspace IOAPICs to use the reservable MSI > routes to inject interrupts into the guest. > > This is a slight abuse of the notion of an MSI Route, given that MSIs > classically bypass the IOAPIC. It might be worthwhile to add an > additional route type to improve clarity. > > Compile tested for Intel x86. > > Signed-off-by: Steve Rutherford <srutherford@xxxxxxxxxx> > Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> > --- > Documentation/virtual/kvm/api.txt | 9 ++++++--- > arch/x86/include/asm/kvm_host.h | 1 + > arch/x86/kvm/ioapic.h | 2 ++ > arch/x86/kvm/irq_comm.c | 42 > +++++++++++++++++++++++++++++++++++++++ > arch/x86/kvm/lapic.c | 3 +-- > arch/x86/kvm/x86.c | 9 ++++++++- > include/linux/kvm_host.h | 17 ++++++++++++++++ > virt/kvm/irqchip.c | 12 ++--------- > 8 files changed, 79 insertions(+), 16 deletions(-) > > diff --git a/Documentation/virtual/kvm/api.txt > b/Documentation/virtual/kvm/api.txt > index bda6cb747b23..dcd748e2d46d 100644 > --- a/Documentation/virtual/kvm/api.txt > +++ b/Documentation/virtual/kvm/api.txt > @@ -3635,7 +3635,7 @@ KVM handlers should exit to userspace with rc = > -EREMOTE. > 7.5 KVM_CAP_SPLIT_IRQCHIP > > Architectures: x86 > -Parameters: None > +Parameters: args[0] - number of routes reserved for userspace IOAPICs > Returns: 0 on success, -1 on error > > Create a local apic for each processor in the kernel. This can be used > @@ -3643,8 +3643,11 @@ instead of KVM_CREATE_IRQCHIP if the userspace > VMM wishes to emulate the > IOAPIC and PIC (and also the PIT, even though this has to be enabled > separately). > > -This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in > kernel > -IOAPIC or PIC. This also enables in kernel routing of interrupt requests. > +This capability also enables in kernel routing of interrupt requests; > +when KVM_CAP_SPLIT_IRQCHIP only routes of KVM_IRQ_ROUTING_MSI type > are > +used in the IRQ routing table. The first args[0] MSI routes are reserved > +for the IOAPIC pins. Whenever the LAPIC receives an EOI for these routes, > +a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace. > > Fails if VCPU has already been created, or if the irqchip is already in the > kernel (i.e. KVM_CREATE_IRQCHIP has already been called). > diff --git a/arch/x86/include/asm/kvm_host.h > b/arch/x86/include/asm/kvm_host.h > index 4294722dfd1d..4bc714f7b164 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -687,6 +687,7 @@ struct kvm_arch { > u64 disabled_quirks; > > bool irqchip_split; > + u8 nr_reserved_ioapic_pins; > }; > > struct kvm_vm_stat { > diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h > index a8842c0dee73..084617d37c74 100644 > --- a/arch/x86/kvm/ioapic.h > +++ b/arch/x86/kvm/ioapic.h > @@ -9,6 +9,7 @@ struct kvm; > struct kvm_vcpu; > > #define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS > +#define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES > #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */ > #define IOAPIC_EDGE_TRIG 0 > #define IOAPIC_LEVEL_TRIG 1 > @@ -121,5 +122,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct > kvm_lapic *src, > int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); > int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); > void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); > +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); > > #endif > diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c > index 67f6b62a6814..177460998bb0 100644 > --- a/arch/x86/kvm/irq_comm.c > +++ b/arch/x86/kvm/irq_comm.c > @@ -335,3 +335,45 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm) > { > return kvm_set_irq_routing(kvm, empty_routing, 0, 0); > } > + > +void kvm_arch_irq_routing_update(struct kvm *kvm) > +{ > + if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm)) > + return; > + kvm_make_scan_ioapic_request(kvm); > +} > + > +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) > +{ > + struct kvm *kvm = vcpu->kvm; > + struct kvm_kernel_irq_routing_entry *entry; > + struct kvm_irq_routing_table *table; > + u32 i, nr_ioapic_pins; > + int idx; > + > + /* kvm->irq_routing must be read after clearing > + * KVM_SCAN_IOAPIC. */ > + smp_mb(); > + idx = srcu_read_lock(&kvm->irq_srcu); > + table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); > + nr_ioapic_pins = min_t(u32, table->nr_rt_entries, > + kvm->arch.nr_reserved_ioapic_pins); > + for (i = 0; i < nr_ioapic_pins; ++i) { > + hlist_for_each_entry(entry, &table->map[i], link) { > + u32 dest_id, dest_mode; > + > + if (entry->type != KVM_IRQ_ROUTING_MSI) > + continue; > + dest_id = (entry->msi.address_lo >> 12) & 0xff; > + dest_mode = (entry->msi.address_lo >> 2) & 0x1; > + if (kvm_apic_match_dest(vcpu, NULL, 0, dest_id, > + dest_mode)) { > + u32 vector = entry->msi.data & 0xff; > + > + __set_bit(vector, > + (unsigned long *) eoi_exit_bitmap); > + } > + } > + } > + srcu_read_unlock(&kvm->irq_srcu, idx); > +} > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index 14b5603ef6c5..38c580aa27c2 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -209,8 +209,7 @@ out: > if (old) > kfree_rcu(old, rcu); > > - if (ioapic_in_kernel(kvm)) > - kvm_vcpu_request_scan_ioapic(kvm); > + kvm_make_scan_ioapic_request(kvm); > } > > static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 16e2f3c577c7..c9fb11491aa3 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -3571,6 +3571,9 @@ static int kvm_vm_ioctl_enable_cap(struct kvm > *kvm, > break; > case KVM_CAP_SPLIT_IRQCHIP: { > mutex_lock(&kvm->lock); > + r = -EINVAL; > + if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS) > + goto split_irqchip_unlock; > r = -EEXIST; > if (irqchip_in_kernel(kvm)) > goto split_irqchip_unlock; > @@ -3582,6 +3585,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm > *kvm, > /* Pairs with irqchip_in_kernel. */ > smp_wmb(); > kvm->arch.irqchip_split = true; > + kvm->arch.nr_reserved_ioapic_pins = cap->args[0]; > r = 0; > split_irqchip_unlock: > mutex_unlock(&kvm->lock); > @@ -6173,7 +6177,10 @@ static void vcpu_scan_ioapic(struct kvm_vcpu > *vcpu) > > memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8); > > - kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap); > + if (irqchip_split(vcpu->kvm)) > + kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap); > + else > + kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap); > kvm_x86_ops->load_eoi_exitmap(vcpu); > } > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 653d494e13d1..27ccdf91a465 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -329,6 +329,19 @@ struct kvm_kernel_irq_routing_entry { > struct hlist_node link; > }; > > +#ifdef CONFIG_HAVE_KVM_IRQCHIP > +struct kvm_irq_routing_table { > + int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; > + struct kvm_kernel_irq_routing_entry *rt_entries; This filed doesn't exist anymore. In fact, this changes is also in my VT-d PI patches. If this series get merged first, I can rebase my patches then. Thanks, Feng > + u32 nr_rt_entries; > + /* > + * Array indexed by gsi. Each entry contains list of irq chips > + * the gsi is connected to. > + */ > + struct hlist_head map[0]; > +}; > +#endif > + > #ifndef KVM_PRIVATE_MEM_SLOTS > #define KVM_PRIVATE_MEM_SLOTS 0 > #endif > @@ -455,10 +468,14 @@ void vcpu_put(struct kvm_vcpu *vcpu); > > #ifdef __KVM_HAVE_IOAPIC > void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); > +void kvm_arch_irq_routing_update(struct kvm *kvm); > #else > static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) > { > } > +static inline void kvm_arch_irq_routing_update(struct kvm *kvm) > +{ > +} > #endif > > #ifdef CONFIG_HAVE_KVM_IRQFD > diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c > index 21c14244f4c4..4f85c6ee96dc 100644 > --- a/virt/kvm/irqchip.c > +++ b/virt/kvm/irqchip.c > @@ -31,16 +31,6 @@ > #include <trace/events/kvm.h> > #include "irq.h" > > -struct kvm_irq_routing_table { > - int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; > - u32 nr_rt_entries; > - /* > - * Array indexed by gsi. Each entry contains list of irq chips > - * the gsi is connected to. > - */ > - struct hlist_head map[0]; > -}; > - > int kvm_irq_map_gsi(struct kvm *kvm, > struct kvm_kernel_irq_routing_entry *entries, int gsi) > { > @@ -227,6 +217,8 @@ int kvm_set_irq_routing(struct kvm *kvm, > kvm_irq_routing_update(kvm); > mutex_unlock(&kvm->irq_lock); > > + kvm_arch_irq_routing_update(kvm); > + > synchronize_srcu_expedited(&kvm->irq_srcu); > > new = old; > -- > 1.8.3.1 > > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html