On Mon, Oct 17, 2011 at 11:27:46AM +0200, Jan Kiszka wrote: > This cache will help us implementing KVM in-kernel irqchip support > without spreading hooks all over the place. > > KVM requires us to register it first and then deliver it by raising a > pseudo IRQ line returned on registration. While this could be changed > for QEMU-originated MSI messages by adding direct MSI injection, we will > still need this translation for irqfd-originated messages. The > MSIRoutingCache will allow to track those registrations and update them > lazily before the actual delivery. This avoid having to track MSI > vectors at device level (like qemu-kvm currently does). > > Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx> So if many devices are added, exhausting the number of GSIs supported, we get terrible performance intead of simply failing outright. To me, this looks more like a bug than a feature ... > --- > hw/apic.c | 5 +++-- > hw/apic.h | 2 +- > hw/msi.c | 10 +++++++--- > hw/msi.h | 14 +++++++++++++- > hw/msix.c | 7 ++++++- > hw/pc.c | 4 ++-- > hw/pci.h | 4 ++++ > qemu-common.h | 1 + > 8 files changed, 37 insertions(+), 10 deletions(-) > > diff --git a/hw/apic.c b/hw/apic.c > index c1d557d..6811ae1 100644 > --- a/hw/apic.c > +++ b/hw/apic.c > @@ -804,7 +804,7 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr) > return val; > } > > -void apic_deliver_msi(MSIMessage *msg) > +void apic_deliver_msi(MSIMessage *msg, MSIRoutingCache *cache) > { > uint8_t dest = > (msg->address & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; > @@ -829,8 +829,9 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val) > * Mapping them on the global bus happens to work because > * MSI registers are reserved in APIC MMIO and vice versa. */ > MSIMessage msg = { .address = addr, .data = val }; > + static MSIRoutingCache cache; > > - msi_deliver(&msg); > + msi_deliver(&msg, &cache); > return; > } > > diff --git a/hw/apic.h b/hw/apic.h > index fa848fd..353ea3a 100644 > --- a/hw/apic.h > +++ b/hw/apic.h > @@ -18,7 +18,7 @@ void cpu_set_apic_tpr(DeviceState *s, uint8_t val); > uint8_t cpu_get_apic_tpr(DeviceState *s); > void apic_init_reset(DeviceState *s); > void apic_sipi(DeviceState *s); > -void apic_deliver_msi(MSIMessage *msg); > +void apic_deliver_msi(MSIMessage *msg, MSIRoutingCache *cache); > > /* pc.c */ > int cpu_is_bsp(CPUState *env); > diff --git a/hw/msi.c b/hw/msi.c > index 9055155..c8ccb17 100644 > --- a/hw/msi.c > +++ b/hw/msi.c > @@ -40,13 +40,13 @@ > /* Flag for interrupt controller to declare MSI/MSI-X support */ > bool msi_supported; > > -static void msi_unsupported(MSIMessage *msg) > +static void msi_unsupported(MSIMessage *msg, MSIRoutingCache *cache) > { > /* If we get here, the board failed to register a delivery handler. */ > abort(); > } > > -void (*msi_deliver)(MSIMessage *msg) = msi_unsupported; > +void (*msi_deliver)(MSIMessage *msg, MSIRoutingCache *cache) = msi_unsupported; > > /* If we get rid of cap allocator, we won't need this. */ > static inline uint8_t msi_cap_sizeof(uint16_t flags) > @@ -288,6 +288,8 @@ int msi_init(struct PCIDevice *dev, uint8_t offset, > 0xffffffff >> (PCI_MSI_VECTORS_MAX - nr_vectors)); > } > > + dev->msi_cache = g_malloc0(nr_vectors * sizeof(*dev->msi_cache)); > + > if (kvm_enabled() && kvm_irqchip_in_kernel()) { > dev->msi_irq_entries = g_malloc(nr_vectors * > sizeof(*dev->msix_irq_entries)); > @@ -312,6 +314,8 @@ void msi_uninit(struct PCIDevice *dev) > g_free(dev->msi_irq_entries); > } > > + g_free(dev->msi_cache); > + > pci_del_capability(dev, PCI_CAP_ID_MSI, cap_size); > dev->cap_present &= ~QEMU_PCI_CAP_MSI; > > @@ -389,7 +393,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector) > "notify vector 0x%x" > " address: 0x%"PRIx64" data: 0x%"PRIx32"\n", > vector, msg.address, msg.data); > - msi_deliver(&msg); > + msi_deliver(&msg, &dev->msi_cache[vector]); > } > > /* Normally called by pci_default_write_config(). */ > diff --git a/hw/msi.h b/hw/msi.h > index f3152f3..20ae215 100644 > --- a/hw/msi.h > +++ b/hw/msi.h > @@ -29,6 +29,18 @@ struct MSIMessage { > uint32_t data; > }; > > +typedef enum { > + MSI_ROUTE_NONE = 0, > + MSI_ROUTE_STATIC, > +} MSIRouteType; > + > +struct MSIRoutingCache { > + MSIMessage msg; > + MSIRouteType type; > + int kvm_gsi; > + int kvm_irqfd; > +}; > + > extern bool msi_supported; > > bool msi_enabled(const PCIDevice *dev); > @@ -46,6 +58,6 @@ static inline bool msi_present(const PCIDevice *dev) > return dev->cap_present & QEMU_PCI_CAP_MSI; > } > > -extern void (*msi_deliver)(MSIMessage *msg); > +extern void (*msi_deliver)(MSIMessage *msg, MSIRoutingCache *cache); > > #endif /* QEMU_MSI_H */ > diff --git a/hw/msix.c b/hw/msix.c > index 08cc526..e824aef 100644 > --- a/hw/msix.c > +++ b/hw/msix.c > @@ -358,6 +358,8 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries, > if (ret) > goto err_config; > > + dev->msix_cache = g_malloc0(nentries * sizeof *dev->msix_cache); > + > if (kvm_enabled() && kvm_irqchip_in_kernel()) { > dev->msix_irq_entries = g_malloc(nentries * > sizeof *dev->msix_irq_entries); > @@ -409,6 +411,9 @@ int msix_uninit(PCIDevice *dev, MemoryRegion *bar) > dev->msix_entry_used = NULL; > g_free(dev->msix_irq_entries); > dev->msix_irq_entries = NULL; > + > + g_free(dev->msix_cache); > + > dev->cap_present &= ~QEMU_PCI_CAP_MSIX; > return 0; > } > @@ -478,7 +483,7 @@ void msix_notify(PCIDevice *dev, unsigned vector) > > msix_message_from_vector(dev, vector, &msg); > > - msi_deliver(&msg); > + msi_deliver(&msg, &dev->msix_cache[vector]); > } > > void msix_reset(PCIDevice *dev) > diff --git a/hw/pc.c b/hw/pc.c > index 7d29a4a..4d8b524 100644 > --- a/hw/pc.c > +++ b/hw/pc.c > @@ -103,10 +103,10 @@ void isa_irq_handler(void *opaque, int n, int level) > qemu_set_irq(isa->ioapic[n], level); > }; > > -static void pc_msi_deliver(MSIMessage *msg) > +static void pc_msi_deliver(MSIMessage *msg, MSIRoutingCache *cache) > { > if ((msg->address & 0xfff00000) == MSI_ADDR_BASE) { > - apic_deliver_msi(msg); > + apic_deliver_msi(msg, cache); > } else { > stl_phys(msg->address, msg->data); > } > diff --git a/hw/pci.h b/hw/pci.h > index 329ab32..5b5d2fd 100644 > --- a/hw/pci.h > +++ b/hw/pci.h > @@ -197,6 +197,10 @@ struct PCIDevice { > MemoryRegion rom; > uint32_t rom_bar; > > + /* MSI routing chaches */ > + MSIRoutingCache *msi_cache; > + MSIRoutingCache *msix_cache; > + > /* MSI entries */ > int msi_entries_nr; > struct KVMMsiMessage *msi_irq_entries; > diff --git a/qemu-common.h b/qemu-common.h > index d3901bd..c1d1614 100644 > --- a/qemu-common.h > +++ b/qemu-common.h > @@ -16,6 +16,7 @@ typedef struct QEMUFile QEMUFile; > typedef struct QEMUBH QEMUBH; > typedef struct DeviceState DeviceState; > typedef struct MSIMessage MSIMessage; > +typedef struct MSIRoutingCache MSIRoutingCache; > > struct Monitor; > typedef struct Monitor Monitor; > -- > 1.7.3.4 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html