[PATCH] qemu-kvm: device assignment: Enabling MSI-X according to mask bit

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The old MSI-X enabling method assume the entries are written before MSI-X
enabled, but some OS didn't obey this, e.g. FreeBSD. This patch would fix
this.

Also, according to the PCI spec, mask bit of MSI-X table should be set
after reset.

Signed-off-by: Sheng Yang <sheng@xxxxxxxxxxxxxxx>
---
 hw/device-assignment.c |  188 +++++++++++++++++++++++++++++++++++++++++-------
 hw/device-assignment.h |    2 +-
 2 files changed, 162 insertions(+), 28 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 832c236..ed0b491 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -1111,15 +1111,12 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev, unsigned int ctrl_pos)
 #endif
 
 #ifdef KVM_CAP_DEVICE_MSIX
-static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
+
+#define PCI_MSIX_CTRL_MASKBIT	1ul
+static int get_msix_entries_max_nr(AssignedDevice *adev)
 {
-    AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
-    uint16_t entries_nr = 0, entries_max_nr;
-    int pos = 0, i, r = 0;
-    uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl;
-    struct kvm_assigned_msix_nr msix_nr;
-    struct kvm_assigned_msix_entry msix_entry;
-    void *va = adev->msix_table_page;
+    int pos, entries_max_nr;
+    PCIDevice *pci_dev = &adev->dev;
 
     pos = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
 
@@ -1127,20 +1124,48 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
     entries_max_nr &= PCI_MSIX_TABSIZE;
     entries_max_nr += 1;
 
+    return entries_max_nr;
+}
+
+static int assigned_dev_msix_entry_masked(AssignedDevice *adev, int entry)
+{
+    uint32_t msg_ctrl;
+    void *va = adev->msix_table_page;
+
+    memcpy(&msg_ctrl, va + entry * 16 + 12, 4);
+    return (msg_ctrl & PCI_MSIX_CTRL_MASKBIT);
+}
+
+static int get_msix_valid_entries_nr(AssignedDevice *adev,
+				     uint16_t entries_max_nr)
+{
+    void *va = adev->msix_table_page;
+    uint32_t msg_ctrl;
+    uint16_t entries_nr = 0;
+    int i;
+
     /* Get the usable entry number for allocating */
     for (i = 0; i < entries_max_nr; i++) {
         memcpy(&msg_ctrl, va + i * 16 + 12, 4);
-        memcpy(&msg_data, va + i * 16 + 8, 4);
         /* Ignore unused entry even it's unmasked */
-        if (msg_data == 0)
+        if (assigned_dev_msix_entry_masked(adev, i))
             continue;
         entries_nr ++;
     }
+    return entries_nr;
+}
+
+static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev,
+                                         uint16_t entries_nr,
+                                         uint16_t entries_max_nr)
+{
+    AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
+    int i, r = 0;
+    uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl;
+    struct kvm_assigned_msix_nr msix_nr;
+    struct kvm_assigned_msix_entry msix_entry;
+    void *va = adev->msix_table_page;
 
-    if (entries_nr == 0) {
-        fprintf(stderr, "MSI-X entry number is zero!\n");
-        return -EINVAL;
-    }
     msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_segnr, adev->h_busnr,
                                           (uint8_t)adev->h_devfn);
     msix_nr.entry_nr = entries_nr;
@@ -1152,6 +1177,8 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
     }
 
     free_dev_irq_entries(adev);
+    memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV *
+                                        sizeof(*pci_dev->msix_entry_used));
     adev->irq_entries_nr = entries_nr;
     adev->entry = calloc(entries_nr, sizeof(struct kvm_irq_routing_entry));
     if (!adev->entry) {
@@ -1165,10 +1192,10 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
         if (entries_nr >= msix_nr.entry_nr)
             break;
         memcpy(&msg_ctrl, va + i * 16 + 12, 4);
-        memcpy(&msg_data, va + i * 16 + 8, 4);
-        if (msg_data == 0)
+        if (assigned_dev_msix_entry_masked(adev, i))
             continue;
 
+        memcpy(&msg_data, va + i * 16 + 8, 4);
         memcpy(&msg_addr, va + i * 16, 4);
         memcpy(&msg_upper_addr, va + i * 16 + 4, 4);
 
@@ -1182,17 +1209,18 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
         adev->entry[entries_nr].u.msi.address_lo = msg_addr;
         adev->entry[entries_nr].u.msi.address_hi = msg_upper_addr;
         adev->entry[entries_nr].u.msi.data = msg_data;
-        DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!", msg_data, msg_addr);
-	kvm_add_routing_entry(&adev->entry[entries_nr]);
+        DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x!\n", msg_data, msg_addr);
+        kvm_add_routing_entry(&adev->entry[entries_nr]);
 
         msix_entry.gsi = adev->entry[entries_nr].gsi;
         msix_entry.entry = i;
+        pci_dev->msix_entry_used[i] = 1;
         r = kvm_assign_set_msix_entry(kvm_context, &msix_entry);
         if (r) {
             fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r));
             break;
         }
-        DEBUG("MSI-X entry gsi 0x%x, entry %d\n!",
+        DEBUG("MSI-X entry gsi 0x%x, entry %d!\n",
                 msix_entry.gsi, msix_entry.entry);
         entries_nr ++;
     }
@@ -1209,20 +1237,24 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
 {
     struct kvm_assigned_irq assigned_irq_data;
     AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
-    uint16_t *ctrl_word = (uint16_t *)(pci_dev->config + ctrl_pos);
+    uint16_t ctrl_word = *(uint16_t *)(pci_dev->config + ctrl_pos);
     int r;
+    uint16_t entries_nr, entries_max_nr;
+    int enable_msix;
 
     memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
     assigned_irq_data.assigned_dev_id  =
             calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr,
                     (uint8_t)assigned_dev->h_devfn);
 
+    enable_msix = ((ctrl_word & PCI_MSIX_ENABLE) &&
+                  !(ctrl_word & PCI_MSIX_MASK));
+
     /* Some guests gratuitously disable MSIX even if they're not using it,
      * try to catch this by only deassigning irqs if the guest is using
      * MSIX or intends to start. */
     if ((assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MSIX) ||
-        (*ctrl_word & PCI_MSIX_ENABLE)) {
-
+            enable_msix) {
         assigned_irq_data.flags = assigned_dev->irq_requested_type;
         free_dev_irq_entries(assigned_dev);
         r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
@@ -1231,16 +1263,30 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
             perror("assigned_dev_update_msix: deassign irq");
 
         assigned_dev->irq_requested_type = 0;
+        memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV *
+                                        sizeof(*pci_dev->msix_entry_used));
     }
 
-    if (*ctrl_word & PCI_MSIX_ENABLE) {
-        assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX |
-                                  KVM_DEV_IRQ_GUEST_MSIX;
-
-        if (assigned_dev_update_msix_mmio(pci_dev) < 0) {
+    entries_max_nr = assigned_dev->max_msix_entries_nr;
+    if (entries_max_nr == 0) {
+        fprintf(stderr, "assigned_dev_update_msix: MSI-X entries_max_nr == 0");
+        return;
+    }
+    /*
+     * Guest may try to enable MSI-X before setting MSI-X entry done, so
+     * let's wait until guest unmask the entries.
+     */
+    entries_nr = get_msix_valid_entries_nr(assigned_dev, entries_max_nr);
+    if (entries_nr == 0)
+        return;
+    if (enable_msix) {
+        if (assigned_dev_update_msix_mmio(pci_dev,
+                    entries_nr, entries_max_nr) < 0) {
             perror("assigned_dev_update_msix_mmio");
             return;
         }
+        assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX |
+                                  KVM_DEV_IRQ_GUEST_MSIX;
         if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0) {
             perror("assigned_dev_enable_msix: assign irq");
             return;
@@ -1341,6 +1387,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
         bar_nr = msix_table_entry & PCI_MSIX_BIR;
         msix_table_entry &= ~PCI_MSIX_BIR;
         dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
+        dev->max_msix_entries_nr = get_msix_entries_max_nr(dev);
     }
 #endif
 #endif
@@ -1378,10 +1425,90 @@ static void msix_mmio_writel(void *opaque,
     AssignedDevice *adev = opaque;
     unsigned int offset = addr & 0xfff;
     void *page = adev->msix_table_page;
+    int ctrl_word, index;
+    struct kvm_irq_routing_entry new_entry = {};
+    int entry_idx, entries_max_nr, r = 0, i;
+    uint32_t msg_ctrl, msg_data, msg_upper_addr, msg_addr;
+    struct PCIDevice *pci_dev = &adev->dev;
+    uint8_t cap = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
 
     DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
 		    addr, val);
     memcpy((void *)((char *)page + offset), &val, 4);
+
+    index = offset / 16;
+
+    /* Check if mask bit is being accessed */
+    memcpy(&msg_addr, (char *)page + index * 16, 4);
+    memcpy(&msg_upper_addr, (char *)page + index * 16 + 4, 4);
+    memcpy(&msg_data, (char *)page + index * 16 + 8, 4);
+    memcpy(&msg_ctrl, (char *)page + index * 16 + 12, 4);
+    DEBUG("MSI-X entries index %d: "
+            "msg_addr 0x%x, msg_upper_addr 0x%x, msg_data 0x%x, vec_ctl 0x%x\n",
+            index, msg_addr, msg_upper_addr, msg_data, msg_ctrl);
+
+    ctrl_word = pci_get_word(pci_dev->config + cap + PCI_MSIX_FLAGS);
+
+    if (!((ctrl_word & PCI_MSIX_ENABLE) && !(ctrl_word & PCI_MSIX_MASK)))
+        return;
+
+    if (!assigned_dev_msix_entry_masked(adev, index)) {
+        if (!adev->dev.msix_entry_used[index]) {
+            DEBUG("Try to modify unenabled MSI-X entry %d's mask. "
+                    "Reenable MSI-X.\n",
+                    index);
+            assigned_dev_update_msix(&adev->dev, cap + PCI_MSIX_FLAGS);
+        }
+        return;
+    }
+
+    if (!adev->dev.msix_entry_used[index])
+        return;
+
+    entries_max_nr = adev->max_msix_entries_nr;
+
+    /*
+     * Find the index of routing entry, it can be different from 'index' if
+     * empty entry existed in between
+     */
+    entry_idx = -1;
+    for (i = 0; i <= index; i++) {
+        if (adev->dev.msix_entry_used[i])
+            entry_idx ++;
+    }
+    if (entry_idx >= entries_max_nr || entry_idx == -1) {
+        fprintf(stderr, "msix_mmio_writel: Entry idx %d exceed limit!\n",
+			entry_idx);
+        return;
+    }
+
+    if (!assigned_dev_msix_entry_masked(adev, index)) {
+        fprintf(stderr, "msix_mmio_writel: Trying write to unmasked entry!\n");
+        return;
+    }
+
+    new_entry.gsi = adev->entry[entry_idx].gsi;
+    new_entry.type = KVM_IRQ_ROUTING_MSI;
+    new_entry.flags = 0;
+    new_entry.u.msi.address_lo = msg_addr;
+    new_entry.u.msi.address_hi = msg_upper_addr;
+    new_entry.u.msi.data = msg_data;
+    if (memcmp(&adev->entry[entry_idx].u.msi, &new_entry.u.msi,
+                sizeof new_entry.u.msi)) {
+        r = kvm_update_routing_entry(&adev->entry[entry_idx], &new_entry);
+        if (r) {
+            perror("msix_mmio_writel: kvm_update_routing_entry failed\n");
+            return;
+        }
+        r = kvm_commit_irq_routes();
+        if (r) {
+            perror("msix_mmio_writel: kvm_commit_irq_routes failed\n");
+            return;
+        }
+    }
+    adev->entry[entry_idx].u.msi.address_lo = msg_addr;
+    adev->entry[entry_idx].u.msi.address_hi = msg_upper_addr;
+    adev->entry[entry_idx].u.msi.data = msg_data;
 }
 
 static void msix_mmio_writew(void *opaque,
@@ -1408,6 +1535,7 @@ static CPUReadMemoryFunc *msix_mmio_read[] = {
 
 static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
 {
+    int i;
     dev->msix_table_page = mmap(NULL, 0x1000,
                                 PROT_READ|PROT_WRITE,
                                 MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
@@ -1417,8 +1545,12 @@ static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
         return -EFAULT;
     }
     memset(dev->msix_table_page, 0, 0x1000);
+    for (i = 0; i < 0x1000; i += 0x10)
+        *(uint32_t *)(dev->msix_table_page + i + 0xc) = 1;
     dev->mmio_index = cpu_register_io_memory(
                         msix_mmio_read, msix_mmio_write, dev);
+    dev->dev.msix_entry_used = qemu_mallocz(KVM_MAX_MSIX_PER_DEV *
+                                        sizeof *dev->dev.msix_entry_used);
     return 0;
 }
 
@@ -1435,6 +1567,8 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
                 strerror(errno));
     }
     dev->msix_table_page = NULL;
+    free(dev->dev.msix_entry_used);
+    dev->dev.msix_entry_used = NULL;
 }
 
 static const VMStateDescription vmstate_assigned_device = {
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index c94a730..754e5c0 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -104,7 +104,7 @@ typedef struct AssignedDevice {
 #define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2)
         uint32_t state;
     } cap;
-    int irq_entries_nr;
+    int irq_entries_nr, max_msix_entries_nr;
     struct kvm_irq_routing_entry *entry;
     void *msix_table_page;
     target_phys_addr_t msix_table_addr;
-- 
1.7.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux