Signed-off-by: Sheng Yang <sheng@xxxxxxxxxxxxxxx> --- hw/device-assignment.c | 93 +++++++++++++++++++++++++++++++++++++++++------ hw/device-assignment.h | 3 ++ qemu-kvm.c | 40 ++++++++++++++++++++ qemu-kvm.h | 11 ++++++ 4 files changed, 135 insertions(+), 12 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index f81050f..bddee2a 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -70,6 +70,11 @@ static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev, uint32_t address, int len); +static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn) +{ + return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn; +} + static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region, uint32_t addr, int len, uint32_t *val) { @@ -272,6 +277,10 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, AssignedDevRegion *region = &r_dev->v_addrs[region_num]; PCIRegion *real_region = &r_dev->real_device.regions[region_num]; int ret = 0; +#ifdef KVM_CAP_MSIX_MMIO + int cap_mask = kvm_check_extension(kvm_state, KVM_CAP_MSIX_MMIO); + struct kvm_msix_mmio_user msix_mmio; +#endif DEBUG("e_phys=%08" FMT_PCIBUS " r_virt=%p type=%d len=%08" FMT_PCIBUS " region_num=%d \n", e_phys, region->u.r_virtbase, type, e_size, region_num); @@ -290,6 +299,23 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, cpu_register_physical_memory(e_phys + offset, TARGET_PAGE_SIZE, r_dev->mmio_index); +#ifdef KVM_CAP_MSIX_MMIO + if (cap_mask) { + r_dev->guest_msix_table_addr = e_phys + offset; + memset(&msix_mmio, 0, sizeof msix_mmio); + msix_mmio.dev_id = calc_assigned_dev_id(r_dev->h_segnr, + r_dev->h_busnr, r_dev->h_devfn); + msix_mmio.type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV | + KVM_MSIX_MMIO_TYPE_BASE_TABLE; + msix_mmio.base_addr = e_phys + offset; + msix_mmio.base_va = (unsigned long)r_dev->msix_table_page; + msix_mmio.max_entries_nr = r_dev->max_msix_entries_nr; + msix_mmio.flags = 0; + ret = kvm_register_msix_mmio(kvm_context, &msix_mmio); + if (ret) + fprintf(stderr, "fail to register in-kernel msix_mmio!\n"); + } +#endif } } @@ -852,11 +878,6 @@ static void free_assigned_device(AssignedDevice *dev) } } -static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn) -{ - return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn; -} - static void assign_failed_examine(AssignedDevice *dev) { char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns; @@ -1263,6 +1284,8 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev, return r; } +static int assigned_dev_update_routing_handler(void *opaque, unsigned long addr); + static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos) { struct kvm_assigned_irq assigned_irq_data; @@ -1486,7 +1509,9 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) msix_table_entry = pci_get_long(pci_dev->config + pos + PCI_MSIX_TABLE); bar_nr = msix_table_entry & PCI_MSIX_BIR; msix_table_entry &= ~PCI_MSIX_BIR; - dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry; + dev->msix_table_addr = pci_region[bar_nr].base_addr + + msix_table_entry; + dev->max_msix_entries_nr = get_msix_entries_max_nr(dev); } #endif @@ -1670,8 +1695,7 @@ static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr) (8 * (addr & 3))) & 0xffff; } -static void msix_mmio_writel(void *opaque, - target_phys_addr_t addr, uint32_t val) +static void assigned_dev_update_routing(void *opaque, unsigned long addr) { AssignedDevice *adev = opaque; unsigned int offset = addr & 0xfff; @@ -1683,10 +1707,6 @@ static void msix_mmio_writel(void *opaque, struct PCIDevice *pci_dev = &adev->dev; uint8_t cap = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX); - DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n", - addr, val); - memcpy((void *)((char *)page + offset), &val, 4); - index = offset / 16; /* Check if mask bit is being accessed */ @@ -1762,6 +1782,41 @@ static void msix_mmio_writel(void *opaque, adev->entry[entry_idx].u.msi.data = msg_data; } +static int assigned_dev_update_routing_handler(void *opaque, unsigned long addr) +{ + AssignedDevice *adev = opaque; + + if (addr >= adev->guest_msix_table_addr && + addr < adev->guest_msix_table_addr + adev->max_msix_entries_nr * 16) { + assigned_dev_update_routing(opaque, addr); + return 0; + } + return -EINVAL; +} + +static void msix_mmio_writel(void *opaque, + target_phys_addr_t addr, uint32_t val) +{ + AssignedDevice *adev = opaque; + void *page = adev->msix_table_page; + unsigned int offset = addr & 0xfff; +#ifdef KVM_CAP_MSIX_MMIO + int cap_mask = kvm_check_extension(kvm_state, KVM_CAP_MSIX_MMIO); +#else + int cap_mask = 0; +#endif + + DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n", + addr, val); + if (!cap_mask) { + memcpy((void *)((char *)page + offset), &val, 4); + } else { + fprintf(stderr, "msix_mmio_writel: shouldn't be here with KVM_CAP_MSIX_MMIO!\n"); + } + + assigned_dev_update_routing(opaque, addr); +} + static void msix_mmio_writew(void *opaque, target_phys_addr_t addr, uint32_t val) { @@ -1802,7 +1857,17 @@ static int assigned_dev_register_msix_mmio(AssignedDevice *dev) msix_mmio_read, msix_mmio_write, dev); dev->dev.msix_entry_used = qemu_mallocz(KVM_MAX_MSIX_PER_DEV * sizeof *dev->dev.msix_entry_used); + dev->routing_updater_entry = + kvm_add_routing_updater(assigned_dev_update_routing_handler, dev); + if (!dev->routing_updater_entry) { + perror("kvm_add_routing_updater"); + goto out; + } return 0; +out: + free(dev->dev.msix_entry_used); + munmap(dev->msix_table_page, 0x1000); + return -EFAULT; } static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev) @@ -1818,6 +1883,10 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev) strerror(errno)); } dev->msix_table_page = NULL; + if (dev->routing_updater_entry) { + kvm_del_routing_updater(dev->routing_updater_entry); + dev->routing_updater_entry = NULL; + } free(dev->dev.msix_entry_used); dev->dev.msix_entry_used = NULL; } diff --git a/hw/device-assignment.h b/hw/device-assignment.h index 754e5c0..98ef82f 100644 --- a/hw/device-assignment.h +++ b/hw/device-assignment.h @@ -32,6 +32,7 @@ #include "qemu-common.h" #include "qemu-queue.h" #include "pci.h" +#include "qemu-kvm.h" /* From include/linux/pci.h in the kernel sources */ #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) @@ -108,9 +109,11 @@ typedef struct AssignedDevice { struct kvm_irq_routing_entry *entry; void *msix_table_page; target_phys_addr_t msix_table_addr; + target_phys_addr_t guest_msix_table_addr; int mmio_index; int need_emulate_cmd; char *configfd_name; + KVMRoutingUpdateEntry *routing_updater_entry; QLIST_ENTRY(AssignedDevice) next; } AssignedDevice; diff --git a/qemu-kvm.c b/qemu-kvm.c index 956b62a..1ba81c4 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -511,6 +511,40 @@ static int handle_mmio(CPUState *env) return 0; } +static QLIST_HEAD(kvm_routing_update_entry_head, kvm_routing_update_entry) kvm_routing_update_entry_head; + +KVMRoutingUpdateEntry *kvm_add_routing_updater(KVMRoutingUpdateHandler *cb, void *opaque) +{ + KVMRoutingUpdateEntry *e; + + e = qemu_mallocz(sizeof (*e)); + + e->cb = cb; + e->opaque = opaque; + QLIST_INSERT_HEAD(&kvm_routing_update_entry_head, e, entries); + return e; +} + +void kvm_del_routing_updater(KVMRoutingUpdateEntry *e) +{ + QLIST_REMOVE(e, entries); + qemu_free(e); +} + +#ifdef KVM_CAP_MSIX_MMIO +static void kvm_update_msix_routing(CPUState *env) +{ + unsigned long addr = env->kvm_run->mmio.phys_addr; + KVMRoutingUpdateEntry *e; + + for (e = kvm_routing_update_entry_head.lh_first; e; e = e->entries.le_next) { + if (e->cb(e->opaque, addr) == 0) + return; + } + fprintf(stderr, "unhandled MSI-X routing update addr: 0x%lx\n", addr); +} +#endif + int handle_io_window(kvm_context_t kvm) { return 1; @@ -647,6 +681,12 @@ int kvm_run(CPUState *env) kvm_handle_internal_error(env, run); r = 1; break; +#ifdef KVM_CAP_MSIX_MMIO + case KVM_EXIT_MSIX_ROUTING_UPDATE: + kvm_update_msix_routing(env); + r = 1; + break; +#endif default: if (kvm_arch_run(env)) { fprintf(stderr, "unhandled vm exit: 0x%x\n", run->exit_reason); diff --git a/qemu-kvm.h b/qemu-kvm.h index 86799e6..21a3274 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -772,6 +772,17 @@ int kvm_tpr_enable_vapic(CPUState *env); unsigned long kvm_get_thread_id(void); int kvm_cpu_is_stopped(CPUState *env); +typedef struct kvm_routing_update_entry KVMRoutingUpdateEntry; +typedef int KVMRoutingUpdateHandler(void *opaque, unsigned long addr); + +struct kvm_routing_update_entry { + KVMRoutingUpdateHandler *cb; + void *opaque; + QLIST_ENTRY (kvm_routing_update_entry) entries; +}; + +KVMRoutingUpdateEntry *kvm_add_routing_updater(KVMRoutingUpdateHandler *cb, void *opaque); +void kvm_del_routing_updater(KVMRoutingUpdateEntry *e); #endif #endif -- 1.7.0.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html