Support msi-x with irqchip in kernel: allocate entries when they are used, and update when they are unmasked. Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxx> --- Changes since v3: style comments from Avi Changes since v2: move all kvm-specific code into kvm_msi_XXX helpers Changes since v1: add braces as per CODING_STYLE hw/apic.c | 4 +- hw/msix.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- hw/pci.h | 3 + libkvm-all.h | 14 ++++++ qemu-kvm.c | 44 ++++++++++++++++++++ 5 files changed, 185 insertions(+), 5 deletions(-) diff --git a/hw/apic.c b/hw/apic.c index 778a853..cdb5972 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -1088,9 +1088,7 @@ int apic_init(CPUState *env) s->cpu_env = env; apic_reset(s); - if (!kvm_enabled() || !qemu_kvm_irqchip_in_kernel()) { - msix_supported = 1; - } + msix_supported = 1; /* XXX: mapping more APICs at the same memory location */ if (apic_io_memory == 0) { diff --git a/hw/msix.c b/hw/msix.c index 4224d8f..6c9ad0f 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -14,6 +14,7 @@ #include "hw.h" #include "msix.h" #include "pci.h" +#include "qemu-kvm.h" /* Declaration from linux/pci_regs.h */ #define PCI_CAP_ID_MSIX 0x11 /* MSI-X */ @@ -62,6 +63,101 @@ /* Flag for interrupt controller to declare MSI-X support */ int msix_supported; +/* KVM specific MSIX helpers */ +static void kvm_msix_free(PCIDevice *dev) +{ + int vector, changed = 0; + for (vector = 0; vector < dev->msix_entries_nr; ++vector) { + if (dev->msix_entry_used[vector]) { + kvm_del_routing_entry(kvm_context, &dev->msix_irq_entries[vector]); + changed = 1; + } + } + if (changed) { + kvm_commit_irq_routes(kvm_context); + } +} + +static void kvm_msix_routing_entry(PCIDevice *dev, unsigned vector, + struct kvm_irq_routing_entry *entry) +{ + uint8_t *table_entry = dev->msix_table_page + vector * MSIX_ENTRY_SIZE; + entry->type = KVM_IRQ_ROUTING_MSI; + entry->flags = 0; + entry->u.msi.address_lo = pci_get_long(table_entry + MSIX_MSG_ADDR); + entry->u.msi.address_hi = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR); + entry->u.msi.data = pci_get_long(table_entry + MSIX_MSG_DATA); +} + +static void kvm_msix_update(PCIDevice *dev, int vector, + int was_masked, int is_masked) +{ + struct kvm_irq_routing_entry e = {}, *entry; + int mask_cleared = was_masked && !is_masked; + /* It is only legal to change an entry when it is masked. Therefore, it is + * enough to update the routing in kernel when mask is being cleared. */ + if (!mask_cleared) { + return; + } + if (!dev->msix_entry_used[vector]) { + return; + } + entry = dev->msix_irq_entries + vector; + e.gsi = entry->gsi; + kvm_msix_routing_entry(dev, vector, &e); + if (memcmp(&entry->u.msi, &e.u.msi, sizeof entry->u.msi)) { + int r; + r = kvm_update_routing_entry(kvm_context, entry, &e); + if (r) { + fprintf(stderr, "%s: kvm_update_routing_entry failed: %d\n", __func__, + r); + exit(1); + } + memcpy(&entry->u.msi, &e.u.msi, sizeof entry->u.msi); + r = kvm_commit_irq_routes(kvm_context); + if (r) { + fprintf(stderr, "%s: kvm_commit_irq_routes failed: %d\n", __func__, + r); + exit(1); + } + } +} + +static int kvm_msix_add(PCIDevice *dev, unsigned vector) +{ + struct kvm_irq_routing_entry *entry = dev->msix_irq_entries + vector; + int r; + + r = kvm_get_irq_route_gsi(kvm_context); + if (r < 0) { + fprintf(stderr, "%s: kvm_get_irq_route_gsi failed: %d\n", __func__, r); + return r; + } + entry->gsi = r; + kvm_msix_routing_entry(dev, vector, entry); + r = kvm_add_routing_entry(kvm_context, entry); + if (r < 0) { + fprintf(stderr, "%s: kvm_add_routing_entry failed: %d\n", __func__, r); + return r; + } + + r = kvm_commit_irq_routes(kvm_context); + if (r < 0) { + fprintf(stderr, "%s: kvm_commit_irq_routes failed: %d\n", __func__, r); + return r; + } + return 0; +} + +static void kvm_msix_del(PCIDevice *dev, unsigned vector) +{ + if (dev->msix_entry_used[vector]) { + return; + } + kvm_del_routing_entry(kvm_context, &dev->msix_irq_entries[vector]); + kvm_commit_irq_routes(kvm_context); +} + /* Add MSI-X capability to the config space for the device. */ /* Given a bar and its size, add MSI-X table on top of it * and fill MSI-X capability in the config space. @@ -109,6 +205,9 @@ static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries, static void msix_free_irq_entries(PCIDevice *dev) { int vector; + if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) { + kvm_msix_free(dev); + } for (vector = 0; vector < dev->msix_entries_nr; ++vector) dev->msix_entry_used[vector] = 0; @@ -181,7 +280,11 @@ static void msix_mmio_writel(void *opaque, target_phys_addr_t addr, PCIDevice *dev = opaque; unsigned int offset = addr & (MSIX_PAGE_SIZE - 1); int vector = offset / MSIX_ENTRY_SIZE; + int was_masked = msix_is_masked(dev, vector); memcpy(dev->msix_table_page + offset, &val, 4); + if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) { + kvm_msix_update(dev, vector, was_masked, msix_is_masked(dev, vector)); + } if (!msix_is_masked(dev, vector) && msix_is_pending(dev, vector)) { msix_clr_pending(dev, vector); msix_notify(dev, vector); @@ -234,6 +337,10 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries, if (nentries > MSIX_MAX_ENTRIES) return -EINVAL; + if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) { + dev->msix_irq_entries = qemu_malloc(nentries * + sizeof *dev->msix_irq_entries); + } dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES * sizeof *dev->msix_entry_used); @@ -278,6 +385,8 @@ int msix_uninit(PCIDevice *dev) dev->msix_table_page = NULL; qemu_free(dev->msix_entry_used); dev->msix_entry_used = NULL; + qemu_free(dev->msix_irq_entries); + dev->msix_irq_entries = NULL; dev->cap_present &= ~QEMU_PCI_CAP_MSIX; return 0; } @@ -340,6 +449,10 @@ void msix_notify(PCIDevice *dev, unsigned vector) msix_set_pending(dev, vector); return; } + if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) { + kvm_set_irq(dev->msix_irq_entries[vector].gsi, 1, NULL); + return; + } address = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR); address = (address << 32) | pci_get_long(table_entry + MSIX_MSG_ADDR); @@ -369,13 +482,21 @@ int msix_vector_use(PCIDevice *dev, unsigned vector) { if (vector >= dev->msix_entries_nr) return -EINVAL; - dev->msix_entry_used[vector]++; + if (dev->msix_entry_used[vector]++) + return 0; + if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) { + return kvm_msix_add(dev, vector); + } return 0; } /* Mark vector as unused. */ void msix_vector_unuse(PCIDevice *dev, unsigned vector) { - if (vector < dev->msix_entries_nr && dev->msix_entry_used[vector]) + if (vector < dev->msix_entries_nr && dev->msix_entry_used[vector]) { --dev->msix_entry_used[vector]; + if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) { + kvm_msix_del(dev, vector); + } + } } diff --git a/hw/pci.h b/hw/pci.h index 7ae9c93..66235e4 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -5,6 +5,8 @@ #include "qdev.h" +struct kvm_irq_routing_entry; + /* PCI includes legacy ISA access. */ #include "isa.h" @@ -232,6 +234,7 @@ struct PCIDevice { unsigned *msix_entry_used; /* Region including the MSI-X table */ uint32_t msix_bar_size; + struct kvm_irq_routing_entry *msix_irq_entries; /* Device capability configuration space */ struct { diff --git a/libkvm-all.h b/libkvm-all.h index ecd3065..4a98bcb 100644 --- a/libkvm-all.h +++ b/libkvm-all.h @@ -898,6 +898,20 @@ int kvm_del_routing_entry(kvm_context_t kvm, struct kvm_irq_routing_entry* entry); /*! + * \brief Updates a routing in the temporary irq routing table + * + * Update a routing in the temporary irq routing table + * with a new value. entry type and GSI can not be changed. + * Nothing is committed to the running VM. + * + * \param kvm Pointer to the current kvm_context + */ +int kvm_update_routing_entry(kvm_context_t kvm, + struct kvm_irq_routing_entry* entry, + struct kvm_irq_routing_entry* newentry +); + +/*! * \brief Commit the temporary irq routing table * * Commit the temporary irq routing table to the running VM. diff --git a/qemu-kvm.c b/qemu-kvm.c index c5cd038..8d226be 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -1448,6 +1448,50 @@ int kvm_del_routing_entry(kvm_context_t kvm, #endif } +int kvm_update_routing_entry(kvm_context_t kvm, + struct kvm_irq_routing_entry* entry, + struct kvm_irq_routing_entry* newentry) +{ +#ifdef KVM_CAP_IRQ_ROUTING + struct kvm_irq_routing_entry *e; + int i; + + if (entry->gsi != newentry->gsi || + entry->type != newentry->type) { + return -EINVAL; + } + + for (i = 0; i < kvm->irq_routes->nr; ++i) { + e = &kvm->irq_routes->entries[i]; + if (e->type != entry->type || e->gsi != entry->gsi) { + continue; + } + switch (e->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + if (e->u.irqchip.irqchip == entry->u.irqchip.irqchip && + e->u.irqchip.pin == entry->u.irqchip.pin) { + memcpy(&e->u.irqchip, &entry->u.irqchip, sizeof e->u.irqchip); + return 0; + } + break; + case KVM_IRQ_ROUTING_MSI: + if (e->u.msi.address_lo == entry->u.msi.address_lo && + e->u.msi.address_hi == entry->u.msi.address_hi && + e->u.msi.data == entry->u.msi.data) { + memcpy(&e->u.msi, &entry->u.msi, sizeof e->u.msi); + return 0; + } + break; + default: + break; + } + } + return -ESRCH; +#else + return -ENOSYS; +#endif +} + int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin) { #ifdef KVM_CAP_IRQ_ROUTING -- 1.6.2.5 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html