This cache will help us implementing KVM in-kernel irqchip support without spreading hooks all over the place. KVM requires us to register it first and then deliver it by raising a pseudo IRQ line returned on registration. While this could be changed for QEMU-originated MSI messages by adding direct MSI injection, we will still need this translation for irqfd-originated messages. The MSIRoutingCache will allow to track those registrations and update them lazily before the actual delivery. This avoid having to track MSI vectors at device level (like qemu-kvm currently does). Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx> --- hw/apic.c | 5 +++-- hw/apic.h | 2 +- hw/msi.c | 10 +++++++--- hw/msi.h | 14 +++++++++++++- hw/msix.c | 7 ++++++- hw/pc.c | 4 ++-- hw/pci.h | 4 ++++ qemu-common.h | 1 + 8 files changed, 37 insertions(+), 10 deletions(-) diff --git a/hw/apic.c b/hw/apic.c index c1d557d..6811ae1 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -804,7 +804,7 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr) return val; } -void apic_deliver_msi(MSIMessage *msg) +void apic_deliver_msi(MSIMessage *msg, MSIRoutingCache *cache) { uint8_t dest = (msg->address & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; @@ -829,8 +829,9 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val) * Mapping them on the global bus happens to work because * MSI registers are reserved in APIC MMIO and vice versa. */ MSIMessage msg = { .address = addr, .data = val }; + static MSIRoutingCache cache; - msi_deliver(&msg); + msi_deliver(&msg, &cache); return; } diff --git a/hw/apic.h b/hw/apic.h index fa848fd..353ea3a 100644 --- a/hw/apic.h +++ b/hw/apic.h @@ -18,7 +18,7 @@ void cpu_set_apic_tpr(DeviceState *s, uint8_t val); uint8_t cpu_get_apic_tpr(DeviceState *s); void apic_init_reset(DeviceState *s); void apic_sipi(DeviceState *s); -void apic_deliver_msi(MSIMessage *msg); +void apic_deliver_msi(MSIMessage *msg, MSIRoutingCache *cache); /* pc.c */ int cpu_is_bsp(CPUState *env); diff --git a/hw/msi.c b/hw/msi.c index 9055155..c8ccb17 100644 --- a/hw/msi.c +++ b/hw/msi.c @@ -40,13 +40,13 @@ /* Flag for interrupt controller to declare MSI/MSI-X support */ bool msi_supported; -static void msi_unsupported(MSIMessage *msg) +static void msi_unsupported(MSIMessage *msg, MSIRoutingCache *cache) { /* If we get here, the board failed to register a delivery handler. */ abort(); } -void (*msi_deliver)(MSIMessage *msg) = msi_unsupported; +void (*msi_deliver)(MSIMessage *msg, MSIRoutingCache *cache) = msi_unsupported; /* If we get rid of cap allocator, we won't need this. */ static inline uint8_t msi_cap_sizeof(uint16_t flags) @@ -288,6 +288,8 @@ int msi_init(struct PCIDevice *dev, uint8_t offset, 0xffffffff >> (PCI_MSI_VECTORS_MAX - nr_vectors)); } + dev->msi_cache = g_malloc0(nr_vectors * sizeof(*dev->msi_cache)); + if (kvm_enabled() && kvm_irqchip_in_kernel()) { dev->msi_irq_entries = g_malloc(nr_vectors * sizeof(*dev->msix_irq_entries)); @@ -312,6 +314,8 @@ void msi_uninit(struct PCIDevice *dev) g_free(dev->msi_irq_entries); } + g_free(dev->msi_cache); + pci_del_capability(dev, PCI_CAP_ID_MSI, cap_size); dev->cap_present &= ~QEMU_PCI_CAP_MSI; @@ -389,7 +393,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector) "notify vector 0x%x" " address: 0x%"PRIx64" data: 0x%"PRIx32"\n", vector, msg.address, msg.data); - msi_deliver(&msg); + msi_deliver(&msg, &dev->msi_cache[vector]); } /* Normally called by pci_default_write_config(). */ diff --git a/hw/msi.h b/hw/msi.h index f3152f3..20ae215 100644 --- a/hw/msi.h +++ b/hw/msi.h @@ -29,6 +29,18 @@ struct MSIMessage { uint32_t data; }; +typedef enum { + MSI_ROUTE_NONE = 0, + MSI_ROUTE_STATIC, +} MSIRouteType; + +struct MSIRoutingCache { + MSIMessage msg; + MSIRouteType type; + int kvm_gsi; + int kvm_irqfd; +}; + extern bool msi_supported; bool msi_enabled(const PCIDevice *dev); @@ -46,6 +58,6 @@ static inline bool msi_present(const PCIDevice *dev) return dev->cap_present & QEMU_PCI_CAP_MSI; } -extern void (*msi_deliver)(MSIMessage *msg); +extern void (*msi_deliver)(MSIMessage *msg, MSIRoutingCache *cache); #endif /* QEMU_MSI_H */ diff --git a/hw/msix.c b/hw/msix.c index 08cc526..e824aef 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -358,6 +358,8 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries, if (ret) goto err_config; + dev->msix_cache = g_malloc0(nentries * sizeof *dev->msix_cache); + if (kvm_enabled() && kvm_irqchip_in_kernel()) { dev->msix_irq_entries = g_malloc(nentries * sizeof *dev->msix_irq_entries); @@ -409,6 +411,9 @@ int msix_uninit(PCIDevice *dev, MemoryRegion *bar) dev->msix_entry_used = NULL; g_free(dev->msix_irq_entries); dev->msix_irq_entries = NULL; + + g_free(dev->msix_cache); + dev->cap_present &= ~QEMU_PCI_CAP_MSIX; return 0; } @@ -478,7 +483,7 @@ void msix_notify(PCIDevice *dev, unsigned vector) msix_message_from_vector(dev, vector, &msg); - msi_deliver(&msg); + msi_deliver(&msg, &dev->msix_cache[vector]); } void msix_reset(PCIDevice *dev) diff --git a/hw/pc.c b/hw/pc.c index 7d29a4a..4d8b524 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -103,10 +103,10 @@ void isa_irq_handler(void *opaque, int n, int level) qemu_set_irq(isa->ioapic[n], level); }; -static void pc_msi_deliver(MSIMessage *msg) +static void pc_msi_deliver(MSIMessage *msg, MSIRoutingCache *cache) { if ((msg->address & 0xfff00000) == MSI_ADDR_BASE) { - apic_deliver_msi(msg); + apic_deliver_msi(msg, cache); } else { stl_phys(msg->address, msg->data); } diff --git a/hw/pci.h b/hw/pci.h index 329ab32..5b5d2fd 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -197,6 +197,10 @@ struct PCIDevice { MemoryRegion rom; uint32_t rom_bar; + /* MSI routing chaches */ + MSIRoutingCache *msi_cache; + MSIRoutingCache *msix_cache; + /* MSI entries */ int msi_entries_nr; struct KVMMsiMessage *msi_irq_entries; diff --git a/qemu-common.h b/qemu-common.h index d3901bd..c1d1614 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -16,6 +16,7 @@ typedef struct QEMUFile QEMUFile; typedef struct QEMUBH QEMUBH; typedef struct DeviceState DeviceState; typedef struct MSIMessage MSIMessage; +typedef struct MSIRoutingCache MSIRoutingCache; struct Monitor; typedef struct Monitor Monitor; -- 1.7.3.4 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html