[RFC][PATCH 43/45] msix: Allow to customize capability on init

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This enables fully configurable MSI-X initialization by taking config
space offset, independent table and PBA BARs and the offset inside them
on msix_init. Table and PBA are now realized as two memory subregions,
either of the passed BAR regions or the single page container
msix_init_simple creates and registers.

Will be required for device assignment.

Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx>
---
 hw/msix.c |  245 +++++++++++++++++++++++++++++++++---------------------------
 hw/msix.h |    7 ++-
 hw/pci.h  |   12 ++-
 3 files changed, 150 insertions(+), 114 deletions(-)

diff --git a/hw/msix.c b/hw/msix.c
index 258b9c1..548e712 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -25,18 +25,12 @@
 #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
 #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
 
-/* How much space does an MSIX table need. */
-/* The spec requires giving the table structure
- * a 4K aligned region all by itself. */
 #define MSIX_PAGE_SIZE 0x1000
-/* Reserve second half of the page for pending bits */
-#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
-#define MSIX_MAX_ENTRIES 32
 
 static void msix_message_from_vector(PCIDevice *dev, unsigned vector,
                                      MSIMessage *msg)
 {
-    uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
+    uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
 
     msg->address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
     msg->data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
@@ -54,67 +48,6 @@ static void kvm_msix_free(PCIDevice *dev)
     }
 }
 
-/* Add MSI-X capability to the config space for the device. */
-/* Given a bar and its size, add MSI-X table on top of it
- * and fill MSI-X capability in the config space.
- * Original bar size must be a power of 2 or 0.
- * New bar size is returned. */
-static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
-                           unsigned bar_nr, unsigned bar_size)
-{
-    int config_offset;
-    uint32_t new_size;
-    uint8_t *config;
-
-    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) {
-        return -EINVAL;
-    }
-    if (bar_size > 0x80000000) {
-        return -ENOSPC;
-    }
-
-    /* Add space for MSI-X structures */
-    if (!bar_size) {
-        new_size = MSIX_PAGE_SIZE;
-    } else if (bar_size < MSIX_PAGE_SIZE) {
-        bar_size = MSIX_PAGE_SIZE;
-        new_size = MSIX_PAGE_SIZE * 2;
-    } else {
-        new_size = bar_size * 2;
-    }
-
-    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, 0,
-                                       MSIX_CAP_LENGTH);
-    if (config_offset < 0) {
-        return config_offset;
-    }
-    pdev->msix_cap = config_offset;
-
-    config = pdev->config + config_offset;
-    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
-    /* Table on top of BAR */
-    pci_set_long(config + PCI_MSIX_TABLE, bar_size | bar_nr);
-    /* Pending bits on top of that */
-    pci_set_long(config + PCI_MSIX_PBA,
-                 (bar_size + MSIX_PAGE_PENDING) | bar_nr);
-
-    /* Make flags bit writable. */
-    pdev->wmask[config_offset + MSIX_CONTROL_OFFSET] |=
-        MSIX_ENABLE_MASK | MSIX_MASKALL_MASK;
-
-    return 0;
-}
-
-static uint64_t msix_mmio_read(void *opaque, target_phys_addr_t addr,
-                               unsigned size)
-{
-    PCIDevice *dev = opaque;
-    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3;
-    void *page = dev->msix_table_page;
-
-    return pci_get_long(page + offset);
-}
-
 static uint8_t msix_pending_mask(int vector)
 {
     return 1 << (vector % 8);
@@ -122,7 +55,7 @@ static uint8_t msix_pending_mask(int vector)
 
 static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
 {
-    return dev->msix_table_page + MSIX_PAGE_PENDING + vector / 8;
+    return dev->msix_pba + vector / 8;
 }
 
 static int msix_is_pending(PCIDevice *dev, int vector)
@@ -150,7 +83,7 @@ static bool msix_is_masked(PCIDevice *dev, int vector)
     unsigned offset =
         vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
     return msix_function_masked(dev) ||
-	   dev->msix_table_page[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT;
+        dev->msix_table[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT;
 }
 
 static void msix_fire_vector_config_notifier(PCIDevice *dev,
@@ -213,18 +146,25 @@ void msix_write_config(PCIDevice *dev, uint32_t addr,
     }
 }
 
-static void msix_mmio_write(void *opaque, target_phys_addr_t addr,
-                            uint64_t val, unsigned size)
+static uint64_t msix_table_read(void *opaque, target_phys_addr_t addr,
+                                unsigned size)
+{
+    PCIDevice *dev = opaque;
+
+    return pci_get_long(dev->msix_table + addr);
+}
+
+static void msix_table_write(void *opaque, target_phys_addr_t addr,
+                             uint64_t val, unsigned size)
 {
     PCIDevice *dev = opaque;
-    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3;
-    unsigned int vector = offset / PCI_MSIX_ENTRY_SIZE;
+    unsigned int vector = addr / PCI_MSIX_ENTRY_SIZE;
     bool was_masked = msix_is_masked(dev, vector);
     bool is_masked;
 
-    pci_set_long(dev->msix_table_page + offset, val);
+    pci_set_long(dev->msix_table + addr, val);
 
-    if (msix_enabled(dev) && vector < dev->msix_entries_nr) {
+    if (msix_enabled(dev)) {
         is_masked = msix_is_masked(dev, vector);
         if (was_masked != is_masked) {
             msix_handle_mask_update(dev, vector);
@@ -234,9 +174,35 @@ static void msix_mmio_write(void *opaque, target_phys_addr_t addr,
     }
 }
 
-static const MemoryRegionOps msix_mmio_ops = {
-    .read = msix_mmio_read,
-    .write = msix_mmio_write,
+static const MemoryRegionOps msix_table_ops = {
+    .read = msix_table_read,
+    .write = msix_table_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static uint64_t msix_pba_read(void *opaque, target_phys_addr_t addr,
+                              unsigned size)
+{
+    PCIDevice *dev = opaque;
+
+    return pci_get_long(dev->msix_pba + addr);
+}
+
+static void msix_pba_write(void *opaque, target_phys_addr_t addr,
+                           uint64_t val, unsigned size)
+{
+    PCIDevice *dev = opaque;
+
+    pci_set_long(dev->msix_pba + addr, val);
+}
+
+static const MemoryRegionOps msix_pba_ops = {
+    .read = msix_pba_read,
+    .write = msix_pba_write,
     .endianness = DEVICE_NATIVE_ENDIAN,
     .valid = {
         .min_access_size = 4,
@@ -253,7 +219,7 @@ static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
             vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
         bool was_masked = msix_is_masked(dev, vector);
 
-        dev->msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
+        dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
 
         if (!was_masked) {
             msix_handle_mask_update(dev, vector);
@@ -261,10 +227,16 @@ static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
     }
 }
 
-/* Initialize the MSI-X structures in a single dedicated BAR
- * and register it. */
-int msix_init_simple(PCIDevice *dev, unsigned short nentries, unsigned bar_nr)
+/* Initialize the MSI-X structures with all degrees of freedom. The caller is
+ * responsible for providing the BAR regions and registering them. */
+int msix_init(PCIDevice *dev, uint8_t config_offset, unsigned int nentries,
+              MemoryRegion *table_bar, unsigned int table_bar_nr,
+              pcibus_t table_offset, MemoryRegion *pba_bar,
+              unsigned int pba_bar_nr, pcibus_t pba_offset)
 {
+    pcibus_t table_size;
+    pcibus_t pba_size;
+    uint8_t *config;
     int ret;
 
     /* Nothing to do if MSI is not supported by interrupt controller */
@@ -273,38 +245,86 @@ int msix_init_simple(PCIDevice *dev, unsigned short nentries, unsigned bar_nr)
         return -ENOTSUP;
     }
 
-    if (nentries > MSIX_MAX_ENTRIES)
+    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1 ||
+        table_bar_nr > 5 || pba_bar_nr > 5) {
         return -EINVAL;
+    }
+
+    table_size = nentries * PCI_MSIX_ENTRY_SIZE;
+    /* Round up to multiples of 16 byte as we cannot create smaller memory
+     * regions. */
+    pba_size = (nentries + 127) / 8;
+
+    if (table_bar_nr == pba_bar_nr &&
+        ranges_overlap(table_offset, table_size, pba_offset, pba_size)) {
+        return -EINVAL;
+    }
 
-    dev->msix_table_page = g_malloc0(MSIX_PAGE_SIZE);
+    ret = pci_add_capability(dev, PCI_CAP_ID_MSIX, config_offset,
+                             MSIX_CAP_LENGTH);
+    if (ret < 0) {
+        return ret;
+    }
+    config_offset = ret;
+
+    dev->msix_table = g_malloc0(table_size);
+    dev->msix_pba = g_malloc0(pba_size);
     msix_mask_all(dev, nentries);
 
-    memory_region_init_io(&dev->msix_mmio, &msix_mmio_ops, dev,
-                          "msix", MSIX_PAGE_SIZE);
+    memory_region_init_io(&dev->msix_table_mem, &msix_table_ops, dev,
+                          "msix-table", table_size);
+    memory_region_add_subregion_overlap(table_bar, table_offset,
+                                        &dev->msix_table_mem, 1);
 
-    dev->msix_entries_nr = nentries;
-    ret = msix_add_config(dev, nentries, bar_nr, 0);
-    if (ret)
-        goto err_config;
+    memory_region_init_io(&dev->msix_pba_mem, &msix_pba_ops, dev,
+                          "msix-pba", pba_size);
+    memory_region_add_subregion_overlap(pba_bar, pba_offset,
+                                        &dev->msix_pba_mem, 1);
+
+    config = dev->config + config_offset;
+    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
+    pci_set_long(config + PCI_MSIX_TABLE, table_offset | table_bar_nr);
+    pci_set_long(config + PCI_MSIX_PBA, pba_offset | pba_bar_nr);
+
+    /* Make flags bit writable. */
+    dev->wmask[config_offset + MSIX_CONTROL_OFFSET] |=
+        MSIX_ENABLE_MASK | MSIX_MASKALL_MASK;
 
     dev->msix_cache = g_malloc0(nentries * sizeof *dev->msix_cache);
 
+    dev->msix_cap = config_offset;
+    dev->msix_entries_nr = nentries;
     dev->cap_present |= QEMU_PCI_CAP_MSIX;
 
-    pci_register_bar(dev, bar_nr, PCI_BASE_ADDRESS_SPACE_MEMORY,
-                     &dev->msix_mmio);
     return 0;
+}
 
-err_config:
-    dev->msix_entries_nr = 0;
-    memory_region_destroy(&dev->msix_mmio);
-    g_free(dev->msix_table_page);
-    dev->msix_table_page = NULL;
-    return ret;
+/* Initialize the MSI-X structures in a single dedicated BAR
+ * and register it. */
+int msix_init_simple(PCIDevice *dev, unsigned short nentries, unsigned bar_nr)
+{
+    int ret;
+
+    assert(nentries * PCI_MSIX_ENTRY_SIZE <= MSIX_PAGE_SIZE / 2);
+
+    memory_region_init(&dev->msix_simple_container, "msix-container",
+                       MSIX_PAGE_SIZE);
+
+    ret = msix_init(dev, 0, nentries, &dev->msix_simple_container, bar_nr, 0,
+                    &dev->msix_simple_container, bar_nr, MSIX_PAGE_SIZE / 2);
+    if (ret < 0) {
+        memory_region_destroy(&dev->msix_simple_container);
+        return ret;
+    }
+
+    pci_register_bar(dev, bar_nr, PCI_BASE_ADDRESS_SPACE_MEMORY,
+                     &dev->msix_simple_container);
+    return 0;
 }
 
 /* Clean up resources for the device. */
-void msix_uninit(PCIDevice *dev, MemoryRegion *bar)
+void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar,
+                 MemoryRegion *pba_bar)
 {
     if (!msix_present(dev)) {
         return;
@@ -312,10 +332,14 @@ void msix_uninit(PCIDevice *dev, MemoryRegion *bar)
     pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
     dev->msix_cap = 0;
     dev->msix_entries_nr = 0;
-    memory_region_del_subregion(bar, &dev->msix_mmio);
-    memory_region_destroy(&dev->msix_mmio);
-    g_free(dev->msix_table_page);
-    dev->msix_table_page = NULL;
+    memory_region_del_subregion(pba_bar, &dev->msix_pba_mem);
+    memory_region_destroy(&dev->msix_pba_mem);
+    memory_region_del_subregion(table_bar, &dev->msix_table_mem);
+    memory_region_destroy(&dev->msix_table_mem);
+    g_free(dev->msix_table);
+    dev->msix_table = NULL;
+    g_free(dev->msix_pba);
+    dev->msix_pba = NULL;
 
     kvm_msix_free(dev);
     g_free(dev->msix_cache);
@@ -325,7 +349,7 @@ void msix_uninit(PCIDevice *dev, MemoryRegion *bar)
 
 void msix_uninit_simple(PCIDevice *dev)
 {
-    msix_uninit(dev, &dev->msix_mmio);
+    msix_uninit(dev, &dev->msix_table_mem, &dev->msix_pba_mem);
 }
 
 void msix_save(PCIDevice *dev, QEMUFile *f)
@@ -335,8 +359,8 @@ void msix_save(PCIDevice *dev, QEMUFile *f)
     if (!msix_present(dev)) {
         return;
     }
-    qemu_put_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
-    qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
+    qemu_put_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
+    qemu_put_buffer(f, dev->msix_pba, (n + 7) / 8);
 }
 
 /* Should be called after restoring the config space. */
@@ -348,8 +372,8 @@ void msix_load(PCIDevice *dev, QEMUFile *f)
         return;
     }
 
-    qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
-    qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
+    qemu_get_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
+    qemu_get_buffer(f, dev->msix_pba, (n + 7) / 8);
 }
 
 /* Does device support MSI-X? */
@@ -391,7 +415,8 @@ void msix_reset(PCIDevice *dev)
     msix_clear_all_vectors(dev);
     dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &=
 	    ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET];
-    memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE);
+    memset(dev->msix_table, 0, dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE);
+    memset(dev->msix_pba, 0, (dev->msix_entries_nr + 7) / 8);
     msix_mask_all(dev, dev->msix_entries_nr);
 }
 
diff --git a/hw/msix.h b/hw/msix.h
index 56e7ba5..040b552 100644
--- a/hw/msix.h
+++ b/hw/msix.h
@@ -4,12 +4,17 @@
 #include "qemu-common.h"
 #include "pci.h"
 
+int msix_init(PCIDevice *pdev, uint8_t config_offset, unsigned int nentries,
+              MemoryRegion *table_bar, unsigned int table_bar_nr,
+              pcibus_t table_offset, MemoryRegion *pba_bar,
+              unsigned int pba_bar_nr, pcibus_t pba_offset);
 int msix_init_simple(PCIDevice *dev, unsigned short nentries, unsigned bar_nr);
 
 void msix_write_config(PCIDevice *pci_dev, uint32_t address,
                        uint32_t old_val, int len);
 
-void msix_uninit(PCIDevice *d, MemoryRegion *bar);
+void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar,
+                 MemoryRegion *pba_bar);
 void msix_uninit_simple(PCIDevice *d);
 
 void msix_save(PCIDevice *dev, QEMUFile *f);
diff --git a/hw/pci.h b/hw/pci.h
index e2be271..4b90f5c 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -175,9 +175,15 @@ struct PCIDevice {
     int msix_entries_nr;
 
     /* Space to store MSIX table */
-    uint8_t *msix_table_page;
-    /* MMIO index used to map MSIX table and pending bit entries. */
-    MemoryRegion msix_mmio;
+    uint8_t *msix_table;
+    /* Space to store MSIX PBA */
+    uint8_t *msix_pba;
+    /* single-page MSI-X MMIO container. */
+    MemoryRegion msix_simple_container;
+    /* Used to map MSIX table. */
+    MemoryRegion msix_table_mem;
+    /* Used to map PBA. */
+    MemoryRegion msix_pba_mem;
     /* Version id needed for VMState */
     int32_t version_id;
 
-- 
1.7.3.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux