This enables fully configurable MSI-X initialization by taking config space offset, independent table and PBA BARs and the offset inside them on msix_init. Table and PBA are now realized as two memory subregions, either of the passed BAR regions or the single page container msix_init_simple creates and registers. Will be required for device assignment. Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx> --- hw/msix.c | 245 +++++++++++++++++++++++++++++++++--------------------------- hw/msix.h | 7 ++- hw/pci.h | 12 ++- 3 files changed, 150 insertions(+), 114 deletions(-) diff --git a/hw/msix.c b/hw/msix.c index 258b9c1..548e712 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -25,18 +25,12 @@ #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8) #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8) -/* How much space does an MSIX table need. */ -/* The spec requires giving the table structure - * a 4K aligned region all by itself. */ #define MSIX_PAGE_SIZE 0x1000 -/* Reserve second half of the page for pending bits */ -#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2) -#define MSIX_MAX_ENTRIES 32 static void msix_message_from_vector(PCIDevice *dev, unsigned vector, MSIMessage *msg) { - uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE; + uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE; msg->address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR); msg->data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA); @@ -54,67 +48,6 @@ static void kvm_msix_free(PCIDevice *dev) } } -/* Add MSI-X capability to the config space for the device. */ -/* Given a bar and its size, add MSI-X table on top of it - * and fill MSI-X capability in the config space. - * Original bar size must be a power of 2 or 0. - * New bar size is returned. */ -static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries, - unsigned bar_nr, unsigned bar_size) -{ - int config_offset; - uint32_t new_size; - uint8_t *config; - - if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) { - return -EINVAL; - } - if (bar_size > 0x80000000) { - return -ENOSPC; - } - - /* Add space for MSI-X structures */ - if (!bar_size) { - new_size = MSIX_PAGE_SIZE; - } else if (bar_size < MSIX_PAGE_SIZE) { - bar_size = MSIX_PAGE_SIZE; - new_size = MSIX_PAGE_SIZE * 2; - } else { - new_size = bar_size * 2; - } - - config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, 0, - MSIX_CAP_LENGTH); - if (config_offset < 0) { - return config_offset; - } - pdev->msix_cap = config_offset; - - config = pdev->config + config_offset; - pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1); - /* Table on top of BAR */ - pci_set_long(config + PCI_MSIX_TABLE, bar_size | bar_nr); - /* Pending bits on top of that */ - pci_set_long(config + PCI_MSIX_PBA, - (bar_size + MSIX_PAGE_PENDING) | bar_nr); - - /* Make flags bit writable. */ - pdev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= - MSIX_ENABLE_MASK | MSIX_MASKALL_MASK; - - return 0; -} - -static uint64_t msix_mmio_read(void *opaque, target_phys_addr_t addr, - unsigned size) -{ - PCIDevice *dev = opaque; - unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3; - void *page = dev->msix_table_page; - - return pci_get_long(page + offset); -} - static uint8_t msix_pending_mask(int vector) { return 1 << (vector % 8); @@ -122,7 +55,7 @@ static uint8_t msix_pending_mask(int vector) static uint8_t *msix_pending_byte(PCIDevice *dev, int vector) { - return dev->msix_table_page + MSIX_PAGE_PENDING + vector / 8; + return dev->msix_pba + vector / 8; } static int msix_is_pending(PCIDevice *dev, int vector) @@ -150,7 +83,7 @@ static bool msix_is_masked(PCIDevice *dev, int vector) unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; return msix_function_masked(dev) || - dev->msix_table_page[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT; + dev->msix_table[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT; } static void msix_fire_vector_config_notifier(PCIDevice *dev, @@ -213,18 +146,25 @@ void msix_write_config(PCIDevice *dev, uint32_t addr, } } -static void msix_mmio_write(void *opaque, target_phys_addr_t addr, - uint64_t val, unsigned size) +static uint64_t msix_table_read(void *opaque, target_phys_addr_t addr, + unsigned size) +{ + PCIDevice *dev = opaque; + + return pci_get_long(dev->msix_table + addr); +} + +static void msix_table_write(void *opaque, target_phys_addr_t addr, + uint64_t val, unsigned size) { PCIDevice *dev = opaque; - unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3; - unsigned int vector = offset / PCI_MSIX_ENTRY_SIZE; + unsigned int vector = addr / PCI_MSIX_ENTRY_SIZE; bool was_masked = msix_is_masked(dev, vector); bool is_masked; - pci_set_long(dev->msix_table_page + offset, val); + pci_set_long(dev->msix_table + addr, val); - if (msix_enabled(dev) && vector < dev->msix_entries_nr) { + if (msix_enabled(dev)) { is_masked = msix_is_masked(dev, vector); if (was_masked != is_masked) { msix_handle_mask_update(dev, vector); @@ -234,9 +174,35 @@ static void msix_mmio_write(void *opaque, target_phys_addr_t addr, } } -static const MemoryRegionOps msix_mmio_ops = { - .read = msix_mmio_read, - .write = msix_mmio_write, +static const MemoryRegionOps msix_table_ops = { + .read = msix_table_read, + .write = msix_table_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static uint64_t msix_pba_read(void *opaque, target_phys_addr_t addr, + unsigned size) +{ + PCIDevice *dev = opaque; + + return pci_get_long(dev->msix_pba + addr); +} + +static void msix_pba_write(void *opaque, target_phys_addr_t addr, + uint64_t val, unsigned size) +{ + PCIDevice *dev = opaque; + + pci_set_long(dev->msix_pba + addr, val); +} + +static const MemoryRegionOps msix_pba_ops = { + .read = msix_pba_read, + .write = msix_pba_write, .endianness = DEVICE_NATIVE_ENDIAN, .valid = { .min_access_size = 4, @@ -253,7 +219,7 @@ static void msix_mask_all(struct PCIDevice *dev, unsigned nentries) vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; bool was_masked = msix_is_masked(dev, vector); - dev->msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT; + dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT; if (!was_masked) { msix_handle_mask_update(dev, vector); @@ -261,10 +227,16 @@ static void msix_mask_all(struct PCIDevice *dev, unsigned nentries) } } -/* Initialize the MSI-X structures in a single dedicated BAR - * and register it. */ -int msix_init_simple(PCIDevice *dev, unsigned short nentries, unsigned bar_nr) +/* Initialize the MSI-X structures with all degrees of freedom. The caller is + * responsible for providing the BAR regions and registering them. */ +int msix_init(PCIDevice *dev, uint8_t config_offset, unsigned int nentries, + MemoryRegion *table_bar, unsigned int table_bar_nr, + pcibus_t table_offset, MemoryRegion *pba_bar, + unsigned int pba_bar_nr, pcibus_t pba_offset) { + pcibus_t table_size; + pcibus_t pba_size; + uint8_t *config; int ret; /* Nothing to do if MSI is not supported by interrupt controller */ @@ -273,38 +245,86 @@ int msix_init_simple(PCIDevice *dev, unsigned short nentries, unsigned bar_nr) return -ENOTSUP; } - if (nentries > MSIX_MAX_ENTRIES) + if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1 || + table_bar_nr > 5 || pba_bar_nr > 5) { return -EINVAL; + } + + table_size = nentries * PCI_MSIX_ENTRY_SIZE; + /* Round up to multiples of 16 byte as we cannot create smaller memory + * regions. */ + pba_size = (nentries + 127) / 8; + + if (table_bar_nr == pba_bar_nr && + ranges_overlap(table_offset, table_size, pba_offset, pba_size)) { + return -EINVAL; + } - dev->msix_table_page = g_malloc0(MSIX_PAGE_SIZE); + ret = pci_add_capability(dev, PCI_CAP_ID_MSIX, config_offset, + MSIX_CAP_LENGTH); + if (ret < 0) { + return ret; + } + config_offset = ret; + + dev->msix_table = g_malloc0(table_size); + dev->msix_pba = g_malloc0(pba_size); msix_mask_all(dev, nentries); - memory_region_init_io(&dev->msix_mmio, &msix_mmio_ops, dev, - "msix", MSIX_PAGE_SIZE); + memory_region_init_io(&dev->msix_table_mem, &msix_table_ops, dev, + "msix-table", table_size); + memory_region_add_subregion_overlap(table_bar, table_offset, + &dev->msix_table_mem, 1); - dev->msix_entries_nr = nentries; - ret = msix_add_config(dev, nentries, bar_nr, 0); - if (ret) - goto err_config; + memory_region_init_io(&dev->msix_pba_mem, &msix_pba_ops, dev, + "msix-pba", pba_size); + memory_region_add_subregion_overlap(pba_bar, pba_offset, + &dev->msix_pba_mem, 1); + + config = dev->config + config_offset; + pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1); + pci_set_long(config + PCI_MSIX_TABLE, table_offset | table_bar_nr); + pci_set_long(config + PCI_MSIX_PBA, pba_offset | pba_bar_nr); + + /* Make flags bit writable. */ + dev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= + MSIX_ENABLE_MASK | MSIX_MASKALL_MASK; dev->msix_cache = g_malloc0(nentries * sizeof *dev->msix_cache); + dev->msix_cap = config_offset; + dev->msix_entries_nr = nentries; dev->cap_present |= QEMU_PCI_CAP_MSIX; - pci_register_bar(dev, bar_nr, PCI_BASE_ADDRESS_SPACE_MEMORY, - &dev->msix_mmio); return 0; +} -err_config: - dev->msix_entries_nr = 0; - memory_region_destroy(&dev->msix_mmio); - g_free(dev->msix_table_page); - dev->msix_table_page = NULL; - return ret; +/* Initialize the MSI-X structures in a single dedicated BAR + * and register it. */ +int msix_init_simple(PCIDevice *dev, unsigned short nentries, unsigned bar_nr) +{ + int ret; + + assert(nentries * PCI_MSIX_ENTRY_SIZE <= MSIX_PAGE_SIZE / 2); + + memory_region_init(&dev->msix_simple_container, "msix-container", + MSIX_PAGE_SIZE); + + ret = msix_init(dev, 0, nentries, &dev->msix_simple_container, bar_nr, 0, + &dev->msix_simple_container, bar_nr, MSIX_PAGE_SIZE / 2); + if (ret < 0) { + memory_region_destroy(&dev->msix_simple_container); + return ret; + } + + pci_register_bar(dev, bar_nr, PCI_BASE_ADDRESS_SPACE_MEMORY, + &dev->msix_simple_container); + return 0; } /* Clean up resources for the device. */ -void msix_uninit(PCIDevice *dev, MemoryRegion *bar) +void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, + MemoryRegion *pba_bar) { if (!msix_present(dev)) { return; @@ -312,10 +332,14 @@ void msix_uninit(PCIDevice *dev, MemoryRegion *bar) pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH); dev->msix_cap = 0; dev->msix_entries_nr = 0; - memory_region_del_subregion(bar, &dev->msix_mmio); - memory_region_destroy(&dev->msix_mmio); - g_free(dev->msix_table_page); - dev->msix_table_page = NULL; + memory_region_del_subregion(pba_bar, &dev->msix_pba_mem); + memory_region_destroy(&dev->msix_pba_mem); + memory_region_del_subregion(table_bar, &dev->msix_table_mem); + memory_region_destroy(&dev->msix_table_mem); + g_free(dev->msix_table); + dev->msix_table = NULL; + g_free(dev->msix_pba); + dev->msix_pba = NULL; kvm_msix_free(dev); g_free(dev->msix_cache); @@ -325,7 +349,7 @@ void msix_uninit(PCIDevice *dev, MemoryRegion *bar) void msix_uninit_simple(PCIDevice *dev) { - msix_uninit(dev, &dev->msix_mmio); + msix_uninit(dev, &dev->msix_table_mem, &dev->msix_pba_mem); } void msix_save(PCIDevice *dev, QEMUFile *f) @@ -335,8 +359,8 @@ void msix_save(PCIDevice *dev, QEMUFile *f) if (!msix_present(dev)) { return; } - qemu_put_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE); - qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8); + qemu_put_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE); + qemu_put_buffer(f, dev->msix_pba, (n + 7) / 8); } /* Should be called after restoring the config space. */ @@ -348,8 +372,8 @@ void msix_load(PCIDevice *dev, QEMUFile *f) return; } - qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE); - qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8); + qemu_get_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE); + qemu_get_buffer(f, dev->msix_pba, (n + 7) / 8); } /* Does device support MSI-X? */ @@ -391,7 +415,8 @@ void msix_reset(PCIDevice *dev) msix_clear_all_vectors(dev); dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &= ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET]; - memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE); + memset(dev->msix_table, 0, dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE); + memset(dev->msix_pba, 0, (dev->msix_entries_nr + 7) / 8); msix_mask_all(dev, dev->msix_entries_nr); } diff --git a/hw/msix.h b/hw/msix.h index 56e7ba5..040b552 100644 --- a/hw/msix.h +++ b/hw/msix.h @@ -4,12 +4,17 @@ #include "qemu-common.h" #include "pci.h" +int msix_init(PCIDevice *pdev, uint8_t config_offset, unsigned int nentries, + MemoryRegion *table_bar, unsigned int table_bar_nr, + pcibus_t table_offset, MemoryRegion *pba_bar, + unsigned int pba_bar_nr, pcibus_t pba_offset); int msix_init_simple(PCIDevice *dev, unsigned short nentries, unsigned bar_nr); void msix_write_config(PCIDevice *pci_dev, uint32_t address, uint32_t old_val, int len); -void msix_uninit(PCIDevice *d, MemoryRegion *bar); +void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, + MemoryRegion *pba_bar); void msix_uninit_simple(PCIDevice *d); void msix_save(PCIDevice *dev, QEMUFile *f); diff --git a/hw/pci.h b/hw/pci.h index e2be271..4b90f5c 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -175,9 +175,15 @@ struct PCIDevice { int msix_entries_nr; /* Space to store MSIX table */ - uint8_t *msix_table_page; - /* MMIO index used to map MSIX table and pending bit entries. */ - MemoryRegion msix_mmio; + uint8_t *msix_table; + /* Space to store MSIX PBA */ + uint8_t *msix_pba; + /* single-page MSI-X MMIO container. */ + MemoryRegion msix_simple_container; + /* Used to map MSIX table. */ + MemoryRegion msix_table_mem; + /* Used to map PBA. */ + MemoryRegion msix_pba_mem; /* Version id needed for VMState */ int32_t version_id; -- 1.7.3.4 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html