Signed-off-by: Sheng Yang <sheng@xxxxxxxxxxxxxxx> --- hw/device-assignment.c | 143 +++++++++++++++++++++++++++++++++++++++++++----- hw/device-assignment.h | 7 ++- qemu-kvm.c | 36 ++++++++++++ qemu-kvm.h | 11 ++++ 4 files changed, 180 insertions(+), 17 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index ed0b491..0aec1f4 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -67,6 +67,11 @@ static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address, uint32_t val, int len); +static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn) +{ + return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn; +} + static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region, uint32_t addr, int len, uint32_t *val) { @@ -269,6 +274,10 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, AssignedDevRegion *region = &r_dev->v_addrs[region_num]; PCIRegion *real_region = &r_dev->real_device.regions[region_num]; int ret = 0; +#ifdef KVM_CAP_MSIX_MMIO + int cap_mask = kvm_check_extension(kvm_state, KVM_CAP_MSIX_MMIO); + struct kvm_msix_mmio_user msix_mmio; +#endif DEBUG("e_phys=%08" FMT_PCIBUS " r_virt=%p type=%d len=%08" FMT_PCIBUS " region_num=%d \n", e_phys, region->u.r_virtbase, type, e_size, region_num); @@ -287,6 +296,45 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, cpu_register_physical_memory(e_phys + offset, TARGET_PAGE_SIZE, r_dev->mmio_index); +#ifdef KVM_CAP_MSIX_MMIO + if (cap_mask) { + r_dev->guest_msix_table_addr = e_phys + offset; + memset(&msix_mmio, 0, sizeof msix_mmio); + msix_mmio.dev_id = calc_assigned_dev_id(r_dev->h_segnr, + r_dev->h_busnr, r_dev->h_devfn); + msix_mmio.type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV | + KVM_MSIX_MMIO_TYPE_BASE_TABLE; + msix_mmio.base_addr = e_phys + offset; + msix_mmio.base_va = (unsigned long)r_dev->msix_table_page; + msix_mmio.max_entries_nr = r_dev->max_msix_entries_nr; + msix_mmio.flags = 0; + ret = kvm_register_msix_mmio(kvm_context, &msix_mmio); + if (ret) + fprintf(stderr, "fail to register in-kernel msix_mmio!\n"); + } +#endif + if (real_region->base_addr <= r_dev->msix_pba_addr && + real_region->base_addr + real_region->size >= + r_dev->msix_pba_addr) { +#ifdef KVM_CAP_MSIX_MMIO + int offset = r_dev->msix_pba_addr - real_region->base_addr; + if (cap_mask) { + r_dev->guest_msix_pba_addr = e_phys + offset; + memset(&msix_mmio, 0, sizeof msix_mmio); + msix_mmio.dev_id = calc_assigned_dev_id(r_dev->h_segnr, + r_dev->h_busnr, r_dev->h_devfn); + msix_mmio.type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV | + KVM_MSIX_MMIO_TYPE_BASE_PBA; + msix_mmio.base_addr = e_phys + offset; + msix_mmio.base_va = (unsigned long)r_dev->msix_pba_page; + msix_mmio.max_entries_nr = r_dev->max_msix_entries_nr; + msix_mmio.flags = 0; + ret = kvm_register_msix_mmio(kvm_context, &msix_mmio); + if (ret) + fprintf(stderr, "fail to register in-kernel msix_mmio!\n"); + } +#endif + } } } @@ -822,11 +870,6 @@ static void free_assigned_device(AssignedDevice *dev) } } -static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn) -{ - return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn; -} - static void assign_failed_examine(AssignedDevice *dev) { char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns; @@ -1233,6 +1276,8 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev, return r; } +static int assigned_dev_update_routing_handler(void *opaque, unsigned long addr); + static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos) { struct kvm_assigned_irq assigned_irq_data; @@ -1368,8 +1413,8 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) #ifdef KVM_CAP_DEVICE_MSIX /* Expose MSI-X capability */ if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSIX))) { - int bar_nr; - uint32_t msix_table_entry; + int table_bar_nr, pba_bar_nr; + uint32_t msix_table_entry, msix_pba_entry; dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX; pci_add_capability(pci_dev, PCI_CAP_ID_MSIX, pos, @@ -1384,9 +1429,17 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL); msix_table_entry = pci_get_long(pci_dev->config + pos + PCI_MSIX_TABLE); - bar_nr = msix_table_entry & PCI_MSIX_BIR; + table_bar_nr = msix_table_entry & PCI_MSIX_BIR; msix_table_entry &= ~PCI_MSIX_BIR; - dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry; + dev->msix_table_addr = pci_region[table_bar_nr].base_addr + + msix_table_entry; + + msix_pba_entry = pci_get_long(pci_dev->config + pos + PCI_MSIX_PBA); + pba_bar_nr = msix_pba_entry & PCI_MSIX_BIR; + msix_pba_entry &= ~PCI_MSIX_BIR; + dev->msix_pba_addr = pci_region[pba_bar_nr].base_addr + + msix_pba_entry; + dev->max_msix_entries_nr = get_msix_entries_max_nr(dev); } #endif @@ -1419,8 +1472,7 @@ static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr) (8 * (addr & 3))) & 0xffff; } -static void msix_mmio_writel(void *opaque, - target_phys_addr_t addr, uint32_t val) +static void assigned_dev_update_routing(void *opaque, unsigned long addr) { AssignedDevice *adev = opaque; unsigned int offset = addr & 0xfff; @@ -1432,10 +1484,6 @@ static void msix_mmio_writel(void *opaque, struct PCIDevice *pci_dev = &adev->dev; uint8_t cap = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX); - DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n", - addr, val); - memcpy((void *)((char *)page + offset), &val, 4); - index = offset / 16; /* Check if mask bit is being accessed */ @@ -1511,6 +1559,41 @@ static void msix_mmio_writel(void *opaque, adev->entry[entry_idx].u.msi.data = msg_data; } +static int assigned_dev_update_routing_handler(void *opaque, unsigned long addr) +{ + AssignedDevice *adev = opaque; + + if (addr >= adev->guest_msix_table_addr && + addr < adev->guest_msix_table_addr + adev->max_msix_entries_nr * 16) { + assigned_dev_update_routing(opaque, addr); + return 0; + } + return -EINVAL; +} + +static void msix_mmio_writel(void *opaque, + target_phys_addr_t addr, uint32_t val) +{ + AssignedDevice *adev = opaque; + void *page = adev->msix_table_page; + unsigned int offset = addr & 0xfff; +#ifdef KVM_CAP_MSIX_MMIO + int cap_mask = kvm_check_extension(kvm_state, KVM_CAP_MSIX_MMIO); +#else + int cap_mask = 0; +#endif + + DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n", + addr, val); + if (!cap_mask) { + memcpy((void *)((char *)page + offset), &val, 4); + } else { + fprintf(stderr, "msix_mmio_writel: shouldn't be here with KVM_CAP_MSIX_MMIO!\n"); + } + + assigned_dev_update_routing(opaque, addr); +} + static void msix_mmio_writew(void *opaque, target_phys_addr_t addr, uint32_t val) { @@ -1547,11 +1630,32 @@ static int assigned_dev_register_msix_mmio(AssignedDevice *dev) memset(dev->msix_table_page, 0, 0x1000); for (i = 0; i < 0x1000; i += 0x10) *(uint32_t *)(dev->msix_table_page + i + 0xc) = 1; + dev->msix_pba_page = mmap(NULL, 0x1000, + PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, 0, 0); + if (dev->msix_pba_page == MAP_FAILED) { + fprintf(stderr, "fail allocate msix_table_page! %s\n", + strerror(errno)); + goto out; + } + memset(dev->msix_pba_page, 0, 0x1000); dev->mmio_index = cpu_register_io_memory( msix_mmio_read, msix_mmio_write, dev); dev->dev.msix_entry_used = qemu_mallocz(KVM_MAX_MSIX_PER_DEV * sizeof *dev->dev.msix_entry_used); + dev->routing_updater_entry = + kvm_add_routing_updater(assigned_dev_update_routing_handler, dev); + if (!dev->routing_updater_entry) { + perror("kvm_add_routing_updater"); + goto out2; + } return 0; +out2: + free(dev->dev.msix_entry_used); + munmap(dev->msix_pba_page, 0x1000); +out: + munmap(dev->msix_table_page, 0x1000); + return -EFAULT; } static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev) @@ -1567,6 +1671,15 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev) strerror(errno)); } dev->msix_table_page = NULL; + if (munmap(dev->msix_pba_page, 0x1000) == -1) { + fprintf(stderr, "error unmapping msix_table_page! %s\n", + strerror(errno)); + } + if (dev->routing_updater_entry) { + kvm_del_routing_updater(dev->routing_updater_entry); + dev->routing_updater_entry = NULL; + } + dev->msix_pba_page = NULL; free(dev->dev.msix_entry_used); dev->dev.msix_entry_used = NULL; } diff --git a/hw/device-assignment.h b/hw/device-assignment.h index 754e5c0..9288753 100644 --- a/hw/device-assignment.h +++ b/hw/device-assignment.h @@ -32,6 +32,7 @@ #include "qemu-common.h" #include "qemu-queue.h" #include "pci.h" +#include "qemu-kvm.h" /* From include/linux/pci.h in the kernel sources */ #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) @@ -106,11 +107,13 @@ typedef struct AssignedDevice { } cap; int irq_entries_nr, max_msix_entries_nr; struct kvm_irq_routing_entry *entry; - void *msix_table_page; - target_phys_addr_t msix_table_addr; + void *msix_table_page, *msix_pba_page; + target_phys_addr_t msix_table_addr, msix_pba_addr; + target_phys_addr_t guest_msix_table_addr, guest_msix_pba_addr; int mmio_index; int need_emulate_cmd; char *configfd_name; + KVMRoutingUpdateEntry *routing_updater_entry; QLIST_ENTRY(AssignedDevice) next; } AssignedDevice; diff --git a/qemu-kvm.c b/qemu-kvm.c index 956b62a..bee398c 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -511,6 +511,38 @@ static int handle_mmio(CPUState *env) return 0; } +static QLIST_HEAD(kvm_routing_update_entry_head, kvm_routing_update_entry) kvm_routing_update_entry_head; + +KVMRoutingUpdateEntry *kvm_add_routing_updater(KVMRoutingUpdateHandler *cb, void *opaque) +{ + KVMRoutingUpdateEntry *e; + + e = qemu_mallocz(sizeof (*e)); + + e->cb = cb; + e->opaque = opaque; + QLIST_INSERT_HEAD(&kvm_routing_update_entry_head, e, entries); + return e; +} + +void kvm_del_routing_updater(KVMRoutingUpdateEntry *e) +{ + QLIST_REMOVE(e, entries); + qemu_free(e); +} + +static void kvm_update_msix_routing(CPUState *env) +{ + unsigned long addr = env->kvm_run->mmio.phys_addr; + KVMRoutingUpdateEntry *e; + + for (e = kvm_routing_update_entry_head.lh_first; e; e = e->entries.le_next) { + if (e->cb(e->opaque, addr) == 0) + return; + } + fprintf(stderr, "unhandled MSI-X routing update addr: 0x%lx\n", addr); +} + int handle_io_window(kvm_context_t kvm) { return 1; @@ -647,6 +679,10 @@ int kvm_run(CPUState *env) kvm_handle_internal_error(env, run); r = 1; break; + case KVM_EXIT_MSIX_ROUTING_UPDATE: + kvm_update_msix_routing(env); + r = 1; + break; default: if (kvm_arch_run(env)) { fprintf(stderr, "unhandled vm exit: 0x%x\n", run->exit_reason); diff --git a/qemu-kvm.h b/qemu-kvm.h index 86799e6..21a3274 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -772,6 +772,17 @@ int kvm_tpr_enable_vapic(CPUState *env); unsigned long kvm_get_thread_id(void); int kvm_cpu_is_stopped(CPUState *env); +typedef struct kvm_routing_update_entry KVMRoutingUpdateEntry; +typedef int KVMRoutingUpdateHandler(void *opaque, unsigned long addr); + +struct kvm_routing_update_entry { + KVMRoutingUpdateHandler *cb; + void *opaque; + QLIST_ENTRY (kvm_routing_update_entry) entries; +}; + +KVMRoutingUpdateEntry *kvm_add_routing_updater(KVMRoutingUpdateHandler *cb, void *opaque); +void kvm_del_routing_updater(KVMRoutingUpdateEntry *e); #endif #endif -- 1.7.0.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html