[PATCH 3/3] qemu-kvm: device assignment: emulate MSI-X mask bits

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch emulated MSI-X per vector mask bit on assigned device.

Signed-off-by: Sheng Yang <sheng@xxxxxxxxxxxxxxx>
---
 hw/device-assignment.c |  161 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 155 insertions(+), 6 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 8a98876..639aa0b 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -62,6 +62,11 @@ static void assigned_dev_load_option_rom(AssignedDevice *dev);
 
 static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev);
 
+static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn)
+{
+    return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn;
+}
+
 static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
                                        uint32_t addr, int len, uint32_t *val)
 {
@@ -264,6 +269,9 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
     AssignedDevRegion *region = &r_dev->v_addrs[region_num];
     PCIRegion *real_region = &r_dev->real_device.regions[region_num];
     int ret = 0;
+#ifdef KVM_CAP_DEVICE_MSIX_MASK
+    struct kvm_assigned_msix_mmio msix_mmio;
+#endif
 
     DEBUG("e_phys=%08" FMT_PCIBUS " r_virt=%p type=%d len=%08" FMT_PCIBUS " region_num=%d \n",
           e_phys, region->u.r_virtbase, type, e_size, region_num);
@@ -282,6 +290,16 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
 
             cpu_register_physical_memory(e_phys + offset,
                     TARGET_PAGE_SIZE, r_dev->mmio_index);
+#ifdef KVM_CAP_DEVICE_MSIX_MASK
+	    memset(&msix_mmio, 0, sizeof(struct kvm_assigned_msix_mmio));
+	    msix_mmio.assigned_dev_id = calc_assigned_dev_id(r_dev->h_segnr,
+			    r_dev->h_busnr, r_dev->h_devfn);
+	    msix_mmio.base_addr = e_phys + offset;
+            /* We required kernel MSI-X support */
+	    ret = kvm_assign_reg_msix_mmio(kvm_context, &msix_mmio);
+	    if (ret)
+                fprintf(stderr, "fail to register in-kernel msix_mmio!\n");
+#endif
         }
     }
 
@@ -824,11 +842,6 @@ static void free_assigned_device(AssignedDevice *dev)
     }
 }
 
-static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn)
-{
-    return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn;
-}
-
 static void assign_failed_examine(AssignedDevice *dev)
 {
     char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns;
@@ -1123,6 +1136,27 @@ static int get_msix_entries_max_nr(AssignedDevice *adev)
     return entries_max_nr;
 }
 
+#ifdef KVM_CAP_DEVICE_MSIX_MASK
+static int assigned_dev_msix_entry_masked(AssignedDevice *adev, int entry)
+{
+    struct kvm_assigned_msix_entry msix_entry;
+    int r;
+
+    memset(&msix_entry, 0, sizeof msix_entry);
+    msix_entry.assigned_dev_id = calc_assigned_dev_id(adev->h_segnr,
+            adev->h_busnr, (uint8_t)adev->h_devfn);
+    msix_entry.entry = entry;
+    msix_entry.flags = KVM_MSIX_FLAG_QUERY_MASK;
+    r = kvm_assign_get_msix_entry(kvm_context, &msix_entry);
+    if (r) {
+        fprintf(stderr, "assigned_dev_msix_entry_masked: "
+			"Fail to get mask bit of entry %d\n", entry);
+        return 1;
+    }
+    return (msix_entry.flags & KVM_MSIX_FLAG_MASK);
+}
+#endif
+
 static int get_msix_valid_entries_nr(AssignedDevice *adev,
 				     uint16_t entries_max_nr)
 {
@@ -1136,7 +1170,11 @@ static int get_msix_valid_entries_nr(AssignedDevice *adev,
         memcpy(&msg_ctrl, va + i * 16 + 12, 4);
         memcpy(&msg_data, va + i * 16 + 8, 4);
         /* Ignore unused entry even it's unmasked */
+#ifdef KVM_CAP_DEVICE_MSIX_MASK
+        if (assigned_dev_msix_entry_masked(adev, i))
+#else
         if (msg_data == 0)
+#endif
             continue;
         entries_nr ++;
     }
@@ -1165,6 +1203,8 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev,
     }
 
     free_dev_irq_entries(adev);
+    memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV *
+                                        sizeof(*pci_dev->msix_entry_used));
     adev->irq_entries_nr = entries_nr;
     adev->entry = calloc(entries_nr, sizeof(struct kvm_irq_routing_entry));
     if (!adev->entry) {
@@ -1179,7 +1219,11 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev,
             break;
         memcpy(&msg_ctrl, va + i * 16 + 12, 4);
         memcpy(&msg_data, va + i * 16 + 8, 4);
+#ifdef KVM_CAP_DEVICE_MSIX_MASK
+        if (assigned_dev_msix_entry_masked(adev, i))
+#else
         if (msg_data == 0)
+#endif
             continue;
 
         memcpy(&msg_addr, va + i * 16, 4);
@@ -1200,6 +1244,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev,
 
         msix_entry.gsi = adev->entry[entries_nr].gsi;
         msix_entry.entry = i;
+        pci_dev->msix_entry_used[i] = 1;
         r = kvm_assign_set_msix_entry(kvm_context, &msix_entry);
         if (r) {
             fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r));
@@ -1243,6 +1288,8 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, int enable_msix)
             perror("assigned_dev_update_msix: deassign irq");
 
         assigned_dev->irq_requested_type = 0;
+        memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV *
+                                        sizeof(*pci_dev->msix_entry_used));
     }
 
     entries_max_nr = get_msix_entries_max_nr(assigned_dev);
@@ -1250,10 +1297,16 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, int enable_msix)
         fprintf(stderr, "assigned_dev_update_msix: MSI-X entries_max_nr == 0");
         return;
     }
+    /*
+     * Guest may try to enable MSI-X before setting MSI-X entry done, so
+     * let's wait until guest unmask the entries.
+     */
     entries_nr = get_msix_valid_entries_nr(assigned_dev, entries_max_nr);
     if (entries_nr == 0) {
+#ifndef KVM_CAP_DEVICE_MSIX_MASK
         if (enable_msix)
             fprintf(stderr, "MSI-X entry number is zero!\n");
+#endif
         return;
     }
     if (enable_msix) {
@@ -1297,7 +1350,8 @@ static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t ad
         if (address <= ctrl_pos && address + len > ctrl_pos) {
             ctrl_pos--; /* control is word long */
             ctrl_word = (uint16_t *)(pci_dev->config + ctrl_pos);
-            assigned_dev_update_msix(pci_dev, (*ctrl_word & PCI_MSIX_ENABLE));
+            assigned_dev_update_msix(pci_dev,
+                    (*ctrl_word & PCI_MSIX_ENABLE) && !(*ctrl_word & PCI_MSIX_MASK));
 	}
         pos += PCI_CAPABILITY_CONFIG_MSIX_LENGTH;
     }
@@ -1395,10 +1449,101 @@ static void msix_mmio_writel(void *opaque,
     AssignedDevice *adev = opaque;
     unsigned int offset = addr & 0xfff;
     void *page = adev->msix_table_page;
+#ifdef KVM_CAP_DEVICE_MSIX_MASK
+    int pos, ctrl_word, index;
+    struct kvm_irq_routing_entry new_entry = {};
+    int entry_idx, entries_max_nr, r = 0, i;
+    uint32_t msg_ctrl, msg_data, msg_upper_addr, msg_addr;
+#endif
 
     DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
 		    addr, val);
     memcpy((void *)((char *)page + offset), &val, 4);
+
+#ifdef KVM_CAP_DEVICE_MSIX_MASK
+    index = offset / 16;
+
+    /* Check if mask bit is being accessed */
+    memcpy(&msg_addr, (char *)page + index * 16, 4);
+    memcpy(&msg_upper_addr, (char *)page + index * 16 + 4, 4);
+    memcpy(&msg_data, (char *)page + index * 16 + 8, 4);
+    memcpy(&msg_ctrl, (char *)page + index * 16 + 12, 4);
+    DEBUG("MSI-X entries index %d: "
+            "msg_addr 0x%x, msg_upper_addr 0x%x, msg_data 0x%x, vec_ctl 0x%x\n",
+            index, msg_addr, msg_upper_addr, msg_data, msg_ctrl);
+
+    if (adev->cap.available & ASSIGNED_DEVICE_CAP_MSI)
+        pos = adev->dev.cap.start + PCI_CAPABILITY_CONFIG_MSI_LENGTH;
+    else
+        pos = adev->dev.cap.start;
+
+    ctrl_word = *(uint16_t *)(adev->dev.config + pos + 2);
+
+    if (!((ctrl_word & PCI_MSIX_ENABLE) && !(ctrl_word & PCI_MSIX_MASK)))
+        return;
+
+    if (!assigned_dev_msix_entry_masked(adev, index)) {
+        if (!adev->dev.msix_entry_used[index]) {
+            DEBUG("Try to modify unenabled MSI-X entry %d's mask. "
+                    "Reenable MSI-X.\n",
+                    index);
+            assigned_dev_update_msix(&adev->dev, 1);
+        }
+        return;
+    }
+
+    if (!adev->dev.msix_entry_used[index])
+        return;
+
+    /*
+     * We're here only because guest want to modify MSI data/addr, and
+     * kernel would filter those writing with mask bit unset.
+     */
+    entries_max_nr = get_msix_entries_max_nr(adev);
+
+    /*
+     * Find the index of routing entry, it can be different from 'index' if
+     * empty entry existed in between
+     */
+    entry_idx = -1;
+    for (i = 0; i <= index; i++) {
+        if (adev->dev.msix_entry_used[i])
+            entry_idx ++;
+    }
+    if (entry_idx >= entries_max_nr || entry_idx == -1) {
+        fprintf(stderr, "msix_mmio_writel: Entry idx %d exceed limit!\n",
+			entry_idx);
+        return;
+    }
+
+    if (!assigned_dev_msix_entry_masked(adev, index)) {
+        fprintf(stderr, "msix_mmio_writel: Trying write to unmasked entry!\n");
+        return;
+    }
+
+    new_entry.gsi = adev->entry[entry_idx].gsi;
+    new_entry.type = KVM_IRQ_ROUTING_MSI;
+    new_entry.flags = 0;
+    new_entry.u.msi.address_lo = msg_addr;
+    new_entry.u.msi.address_hi = msg_upper_addr;
+    new_entry.u.msi.data = msg_data;
+    if (memcmp(&adev->entry[entry_idx].u.msi, &new_entry.u.msi,
+                sizeof new_entry.u.msi)) {
+        r = kvm_update_routing_entry(&adev->entry[entry_idx], &new_entry);
+        if (r) {
+            perror("msix_mmio_writel: kvm_update_routing_entry failed\n");
+            return;
+        }
+        r = kvm_commit_irq_routes();
+        if (r) {
+            perror("msix_mmio_writel: kvm_commit_irq_routes failed\n");
+            return;
+        }
+    }
+    adev->entry[entry_idx].u.msi.address_lo = msg_addr;
+    adev->entry[entry_idx].u.msi.address_hi = msg_upper_addr;
+    adev->entry[entry_idx].u.msi.data = msg_data;
+#endif
 }
 
 static void msix_mmio_writew(void *opaque,
@@ -1436,6 +1581,8 @@ static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
     memset(dev->msix_table_page, 0, 0x1000);
     dev->mmio_index = cpu_register_io_memory(
                         msix_mmio_read, msix_mmio_write, dev);
+    dev->dev.msix_entry_used = qemu_mallocz(KVM_MAX_MSIX_PER_DEV *
+                                        sizeof *dev->dev.msix_entry_used);
     return 0;
 }
 
@@ -1452,6 +1599,8 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
                 strerror(errno));
     }
     dev->msix_table_page = NULL;
+    free(dev->dev.msix_entry_used);
+    dev->dev.msix_entry_used = NULL;
 }
 
 static int assigned_initfn(struct PCIDevice *pci_dev)
-- 
1.7.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux