[PATCH 4/4] qemu-kvm: MSI-X MMIO support for assigned device

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Signed-off-by: Sheng Yang <sheng@xxxxxxxxxxxxxxx>
---
 hw/device-assignment.c |  143 +++++++++++++++++++++++++++++++++++++++++++-----
 hw/device-assignment.h |    7 ++-
 qemu-kvm.c             |   36 ++++++++++++
 qemu-kvm.h             |   11 ++++
 4 files changed, 180 insertions(+), 17 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index ed0b491..0aec1f4 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -67,6 +67,11 @@ static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev,
                                                  uint32_t address,
                                                  uint32_t val, int len);
 
+static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn)
+{
+    return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn;
+}
+
 static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
                                        uint32_t addr, int len, uint32_t *val)
 {
@@ -269,6 +274,10 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
     AssignedDevRegion *region = &r_dev->v_addrs[region_num];
     PCIRegion *real_region = &r_dev->real_device.regions[region_num];
     int ret = 0;
+#ifdef KVM_CAP_MSIX_MMIO
+    int cap_mask = kvm_check_extension(kvm_state, KVM_CAP_MSIX_MMIO);
+    struct kvm_msix_mmio_user msix_mmio;
+#endif
 
     DEBUG("e_phys=%08" FMT_PCIBUS " r_virt=%p type=%d len=%08" FMT_PCIBUS " region_num=%d \n",
           e_phys, region->u.r_virtbase, type, e_size, region_num);
@@ -287,6 +296,45 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
 
             cpu_register_physical_memory(e_phys + offset,
                     TARGET_PAGE_SIZE, r_dev->mmio_index);
+#ifdef KVM_CAP_MSIX_MMIO
+            if (cap_mask) {
+                r_dev->guest_msix_table_addr = e_phys + offset;
+                memset(&msix_mmio, 0, sizeof msix_mmio);
+                msix_mmio.dev_id = calc_assigned_dev_id(r_dev->h_segnr,
+                        r_dev->h_busnr, r_dev->h_devfn);
+                msix_mmio.type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV |
+				KVM_MSIX_MMIO_TYPE_BASE_TABLE;
+                msix_mmio.base_addr = e_phys + offset;
+                msix_mmio.base_va = (unsigned long)r_dev->msix_table_page;
+                msix_mmio.max_entries_nr = r_dev->max_msix_entries_nr;
+                msix_mmio.flags = 0;
+                ret = kvm_register_msix_mmio(kvm_context, &msix_mmio);
+                if (ret)
+                    fprintf(stderr, "fail to register in-kernel msix_mmio!\n");
+            }
+#endif
+        if (real_region->base_addr <= r_dev->msix_pba_addr &&
+                real_region->base_addr + real_region->size >=
+                r_dev->msix_pba_addr) {
+#ifdef KVM_CAP_MSIX_MMIO
+            int offset = r_dev->msix_pba_addr - real_region->base_addr;
+            if (cap_mask) {
+                r_dev->guest_msix_pba_addr = e_phys + offset;
+                memset(&msix_mmio, 0, sizeof msix_mmio);
+                msix_mmio.dev_id = calc_assigned_dev_id(r_dev->h_segnr,
+                        r_dev->h_busnr, r_dev->h_devfn);
+                msix_mmio.type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV |
+				KVM_MSIX_MMIO_TYPE_BASE_PBA;
+                msix_mmio.base_addr = e_phys + offset;
+                msix_mmio.base_va = (unsigned long)r_dev->msix_pba_page;
+                msix_mmio.max_entries_nr = r_dev->max_msix_entries_nr;
+                msix_mmio.flags = 0;
+                ret = kvm_register_msix_mmio(kvm_context, &msix_mmio);
+                if (ret)
+                    fprintf(stderr, "fail to register in-kernel msix_mmio!\n");
+            }
+#endif
+         }
         }
     }
 
@@ -822,11 +870,6 @@ static void free_assigned_device(AssignedDevice *dev)
     }
 }
 
-static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn)
-{
-    return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn;
-}
-
 static void assign_failed_examine(AssignedDevice *dev)
 {
     char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns;
@@ -1233,6 +1276,8 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev,
     return r;
 }
 
+static int assigned_dev_update_routing_handler(void *opaque, unsigned long addr);
+
 static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
 {
     struct kvm_assigned_irq assigned_irq_data;
@@ -1368,8 +1413,8 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
 #ifdef KVM_CAP_DEVICE_MSIX
     /* Expose MSI-X capability */
     if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSIX))) {
-        int bar_nr;
-        uint32_t msix_table_entry;
+        int table_bar_nr, pba_bar_nr;
+        uint32_t msix_table_entry, msix_pba_entry;
 
         dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX;
         pci_add_capability(pci_dev, PCI_CAP_ID_MSIX, pos,
@@ -1384,9 +1429,17 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
                      PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL);
 
         msix_table_entry = pci_get_long(pci_dev->config + pos + PCI_MSIX_TABLE);
-        bar_nr = msix_table_entry & PCI_MSIX_BIR;
+        table_bar_nr = msix_table_entry & PCI_MSIX_BIR;
         msix_table_entry &= ~PCI_MSIX_BIR;
-        dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
+        dev->msix_table_addr = pci_region[table_bar_nr].base_addr +
+                               msix_table_entry;
+
+        msix_pba_entry = pci_get_long(pci_dev->config + pos + PCI_MSIX_PBA);
+        pba_bar_nr = msix_pba_entry & PCI_MSIX_BIR;
+        msix_pba_entry &= ~PCI_MSIX_BIR;
+        dev->msix_pba_addr = pci_region[pba_bar_nr].base_addr +
+                               msix_pba_entry;
+
         dev->max_msix_entries_nr = get_msix_entries_max_nr(dev);
     }
 #endif
@@ -1419,8 +1472,7 @@ static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr)
             (8 * (addr & 3))) & 0xffff;
 }
 
-static void msix_mmio_writel(void *opaque,
-                             target_phys_addr_t addr, uint32_t val)
+static void assigned_dev_update_routing(void *opaque, unsigned long addr)
 {
     AssignedDevice *adev = opaque;
     unsigned int offset = addr & 0xfff;
@@ -1432,10 +1484,6 @@ static void msix_mmio_writel(void *opaque,
     struct PCIDevice *pci_dev = &adev->dev;
     uint8_t cap = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
 
-    DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
-		    addr, val);
-    memcpy((void *)((char *)page + offset), &val, 4);
-
     index = offset / 16;
 
     /* Check if mask bit is being accessed */
@@ -1511,6 +1559,41 @@ static void msix_mmio_writel(void *opaque,
     adev->entry[entry_idx].u.msi.data = msg_data;
 }
 
+static int assigned_dev_update_routing_handler(void *opaque, unsigned long addr)
+{
+    AssignedDevice *adev = opaque;
+
+    if (addr >= adev->guest_msix_table_addr &&
+            addr < adev->guest_msix_table_addr + adev->max_msix_entries_nr * 16) {
+        assigned_dev_update_routing(opaque, addr);
+        return 0;
+    }
+    return -EINVAL;
+}
+
+static void msix_mmio_writel(void *opaque,
+                             target_phys_addr_t addr, uint32_t val)
+{
+    AssignedDevice *adev = opaque;
+    void *page = adev->msix_table_page;
+    unsigned int offset = addr & 0xfff;
+#ifdef KVM_CAP_MSIX_MMIO
+    int cap_mask = kvm_check_extension(kvm_state, KVM_CAP_MSIX_MMIO);
+#else
+    int cap_mask = 0;
+#endif
+
+    DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
+            addr, val);
+    if (!cap_mask) {
+        memcpy((void *)((char *)page + offset), &val, 4);
+    } else {
+        fprintf(stderr, "msix_mmio_writel: shouldn't be here with KVM_CAP_MSIX_MMIO!\n");
+    }
+
+    assigned_dev_update_routing(opaque, addr);
+}
+
 static void msix_mmio_writew(void *opaque,
                              target_phys_addr_t addr, uint32_t val)
 {
@@ -1547,11 +1630,32 @@ static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
     memset(dev->msix_table_page, 0, 0x1000);
     for (i = 0; i < 0x1000; i += 0x10)
         *(uint32_t *)(dev->msix_table_page + i + 0xc) = 1;
+    dev->msix_pba_page = mmap(NULL, 0x1000,
+                                PROT_READ|PROT_WRITE,
+                                MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
+    if (dev->msix_pba_page == MAP_FAILED) {
+        fprintf(stderr, "fail allocate msix_table_page! %s\n",
+                strerror(errno));
+	goto out;
+    }
+    memset(dev->msix_pba_page, 0, 0x1000);
     dev->mmio_index = cpu_register_io_memory(
                         msix_mmio_read, msix_mmio_write, dev);
     dev->dev.msix_entry_used = qemu_mallocz(KVM_MAX_MSIX_PER_DEV *
                                         sizeof *dev->dev.msix_entry_used);
+    dev->routing_updater_entry =
+        kvm_add_routing_updater(assigned_dev_update_routing_handler, dev);
+    if (!dev->routing_updater_entry) {
+        perror("kvm_add_routing_updater");
+	goto out2;
+    }
     return 0;
+out2:
+    free(dev->dev.msix_entry_used);
+    munmap(dev->msix_pba_page, 0x1000);
+out:
+    munmap(dev->msix_table_page, 0x1000);
+    return -EFAULT;
 }
 
 static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
@@ -1567,6 +1671,15 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
                 strerror(errno));
     }
     dev->msix_table_page = NULL;
+    if (munmap(dev->msix_pba_page, 0x1000) == -1) {
+        fprintf(stderr, "error unmapping msix_table_page! %s\n",
+                strerror(errno));
+    }
+    if (dev->routing_updater_entry) {
+        kvm_del_routing_updater(dev->routing_updater_entry);
+        dev->routing_updater_entry = NULL;
+    }
+    dev->msix_pba_page = NULL;
     free(dev->dev.msix_entry_used);
     dev->dev.msix_entry_used = NULL;
 }
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index 754e5c0..9288753 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -32,6 +32,7 @@
 #include "qemu-common.h"
 #include "qemu-queue.h"
 #include "pci.h"
+#include "qemu-kvm.h"
 
 /* From include/linux/pci.h in the kernel sources */
 #define PCI_DEVFN(slot, func)   ((((slot) & 0x1f) << 3) | ((func) & 0x07))
@@ -106,11 +107,13 @@ typedef struct AssignedDevice {
     } cap;
     int irq_entries_nr, max_msix_entries_nr;
     struct kvm_irq_routing_entry *entry;
-    void *msix_table_page;
-    target_phys_addr_t msix_table_addr;
+    void *msix_table_page, *msix_pba_page;
+    target_phys_addr_t msix_table_addr, msix_pba_addr;
+    target_phys_addr_t guest_msix_table_addr, guest_msix_pba_addr;
     int mmio_index;
     int need_emulate_cmd;
     char *configfd_name;
+    KVMRoutingUpdateEntry *routing_updater_entry;
     QLIST_ENTRY(AssignedDevice) next;
 } AssignedDevice;
 
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 956b62a..bee398c 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -511,6 +511,38 @@ static int handle_mmio(CPUState *env)
     return 0;
 }
 
+static QLIST_HEAD(kvm_routing_update_entry_head, kvm_routing_update_entry) kvm_routing_update_entry_head;
+
+KVMRoutingUpdateEntry *kvm_add_routing_updater(KVMRoutingUpdateHandler *cb, void *opaque)
+{
+    KVMRoutingUpdateEntry *e;
+
+    e = qemu_mallocz(sizeof (*e));
+
+    e->cb = cb;
+    e->opaque = opaque;
+    QLIST_INSERT_HEAD(&kvm_routing_update_entry_head, e, entries);
+    return e;
+}
+
+void kvm_del_routing_updater(KVMRoutingUpdateEntry *e)
+{
+    QLIST_REMOVE(e, entries);
+    qemu_free(e);
+}
+
+static void kvm_update_msix_routing(CPUState *env)
+{
+    unsigned long addr = env->kvm_run->mmio.phys_addr;
+    KVMRoutingUpdateEntry *e;
+
+    for (e = kvm_routing_update_entry_head.lh_first; e; e = e->entries.le_next) {
+        if (e->cb(e->opaque, addr) == 0)
+            return;
+    }
+    fprintf(stderr, "unhandled MSI-X routing update addr: 0x%lx\n", addr);
+}
+
 int handle_io_window(kvm_context_t kvm)
 {
     return 1;
@@ -647,6 +679,10 @@ int kvm_run(CPUState *env)
             kvm_handle_internal_error(env, run);
             r = 1;
 	    break;
+	case KVM_EXIT_MSIX_ROUTING_UPDATE:
+            kvm_update_msix_routing(env);
+            r = 1;
+            break;
         default:
             if (kvm_arch_run(env)) {
                 fprintf(stderr, "unhandled vm exit: 0x%x\n", run->exit_reason);
diff --git a/qemu-kvm.h b/qemu-kvm.h
index 86799e6..21a3274 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -772,6 +772,17 @@ int kvm_tpr_enable_vapic(CPUState *env);
 unsigned long kvm_get_thread_id(void);
 int kvm_cpu_is_stopped(CPUState *env);
 
+typedef struct kvm_routing_update_entry KVMRoutingUpdateEntry;
+typedef int KVMRoutingUpdateHandler(void *opaque, unsigned long addr);
+
+struct kvm_routing_update_entry {
+    KVMRoutingUpdateHandler *cb;
+    void *opaque;
+    QLIST_ENTRY (kvm_routing_update_entry) entries;
+};
+
+KVMRoutingUpdateEntry *kvm_add_routing_updater(KVMRoutingUpdateHandler *cb, void *opaque);
+void kvm_del_routing_updater(KVMRoutingUpdateEntry *e);
 #endif
 
 #endif
-- 
1.7.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux