Signed-off-by: Sheng Yang <sheng@xxxxxxxxxxxxxxx> --- hw/device-assignment.c | 106 +++++++++++++++++++++++++++++++++++++++++------ hw/device-assignment.h | 3 + qemu-kvm.c | 46 +++++++++++++++++++++ qemu-kvm.h | 19 +++++++++ 4 files changed, 160 insertions(+), 14 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 5c162c4..09e3b99 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -71,6 +71,11 @@ static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev, uint32_t address, int len); +static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn) +{ + return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn; +} + static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region, uint32_t addr, int len, uint32_t *val) { @@ -274,6 +279,10 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, AssignedDevRegion *region = &r_dev->v_addrs[region_num]; PCIRegion *real_region = &r_dev->real_device.regions[region_num]; int ret = 0; +#ifdef KVM_CAP_MSIX_MMIO + int cap_mask = kvm_check_extension(kvm_state, KVM_CAP_MSIX_MMIO); + struct kvm_msix_mmio_user msix_mmio; +#endif DEBUG("e_phys=%08" FMT_PCIBUS " r_virt=%p type=%d len=%08" FMT_PCIBUS " region_num=%d \n", e_phys, region->u.r_virtbase, type, e_size, region_num); @@ -292,6 +301,23 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, cpu_register_physical_memory(e_phys + offset, TARGET_PAGE_SIZE, r_dev->mmio_index); +#ifdef KVM_CAP_MSIX_MMIO + if (cap_mask) { + r_dev->guest_msix_table_addr = e_phys + offset; + memset(&msix_mmio, 0, sizeof msix_mmio); + msix_mmio.dev_id = calc_assigned_dev_id(r_dev->h_segnr, + r_dev->h_busnr, r_dev->h_devfn); + msix_mmio.type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV | + KVM_MSIX_MMIO_TYPE_BASE_TABLE; + msix_mmio.base_addr = e_phys + offset; + msix_mmio.base_va = (unsigned long)r_dev->msix_table_page; + msix_mmio.max_entries_nr = r_dev->max_msix_entries_nr; + msix_mmio.flags = 0; + ret = kvm_register_msix_mmio(kvm_context, &msix_mmio); + if (ret) + fprintf(stderr, "fail to register in-kernel msix_mmio!\n"); + } +#endif } } @@ -854,11 +880,6 @@ static void free_assigned_device(AssignedDevice *dev) } } -static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn) -{ - return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn; -} - static void assign_failed_examine(AssignedDevice *dev) { char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns; @@ -1268,6 +1289,9 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev, return r; } +static int assigned_dev_update_routing_handler(void *opaque, + struct kvm_msix_routing_data *data); + static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos) { struct kvm_assigned_irq assigned_irq_data; @@ -1494,7 +1518,9 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) msix_table_entry = pci_get_long(pci_dev->config + pos + PCI_MSIX_TABLE); bar_nr = msix_table_entry & PCI_MSIX_BIR; msix_table_entry &= ~PCI_MSIX_BIR; - dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry; + dev->msix_table_addr = pci_region[bar_nr].base_addr + + msix_table_entry; + dev->max_msix_entries_nr = get_msix_entries_max_nr(dev); } #endif @@ -1678,11 +1704,10 @@ static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr) (8 * (addr & 3))) & 0xffff; } -static void msix_mmio_writel(void *opaque, - target_phys_addr_t addr, uint32_t val) +static void assigned_dev_update_routing(void *opaque, + struct kvm_msix_routing_data *data) { AssignedDevice *adev = opaque; - unsigned int offset = addr & 0xfff; void *page = adev->msix_table_page; int ctrl_word, index; struct kvm_irq_routing_entry new_entry = {}; @@ -1691,11 +1716,7 @@ static void msix_mmio_writel(void *opaque, struct PCIDevice *pci_dev = &adev->dev; uint8_t cap = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX); - DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n", - addr, val); - memcpy((void *)((char *)page + offset), &val, 4); - - index = offset / 16; + index = data->entry_idx; /* Check if mask bit is being accessed */ memcpy(&msg_addr, (char *)page + index * 16, 4); @@ -1770,6 +1791,49 @@ static void msix_mmio_writel(void *opaque, adev->entry[entry_idx].u.msi.data = msg_data; } +static int assigned_dev_update_routing_handler(void *opaque, + struct kvm_msix_routing_data *data) +{ + AssignedDevice *adev = opaque; + + if (data->type == KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV && + data->dev_id == calc_assigned_dev_id(adev->h_segnr, + adev->h_busnr, adev->h_devfn)) { + assigned_dev_update_routing(opaque, data); + return 0; + } + return -EINVAL; +} + +static void msix_mmio_writel(void *opaque, + target_phys_addr_t addr, uint32_t val) +{ + AssignedDevice *adev = opaque; + void *page = adev->msix_table_page; + unsigned int offset = addr & 0xfff; + struct kvm_msix_routing_data data; +#ifdef KVM_CAP_MSIX_MMIO + int cap_mask = kvm_check_extension(kvm_state, KVM_CAP_MSIX_MMIO); +#else + int cap_mask = 0; +#endif + + DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n", + addr, val); + if (!cap_mask) { + memcpy((void *)((char *)page + offset), &val, 4); + } else { + fprintf(stderr, "msix_mmio_writel: shouldn't be here with KVM_CAP_MSIX_MMIO!\n"); + } + + data.dev_id = calc_assigned_dev_id(adev->h_segnr, adev->h_busnr, + adev->h_devfn); + data.type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV; + data.entry_idx = offset / 16; + data.flags = 0; + assigned_dev_update_routing(opaque, &data); +} + static void msix_mmio_writew(void *opaque, target_phys_addr_t addr, uint32_t val) { @@ -1811,7 +1875,17 @@ static int assigned_dev_register_msix_mmio(AssignedDevice *dev) DEVICE_NATIVE_ENDIAN); dev->dev.msix_entry_used = qemu_mallocz(KVM_MAX_MSIX_PER_DEV * sizeof *dev->dev.msix_entry_used); + dev->routing_updater_entry = + kvm_add_routing_updater(assigned_dev_update_routing_handler, dev); + if (!dev->routing_updater_entry) { + perror("kvm_add_routing_updater"); + goto out; + } return 0; +out: + free(dev->dev.msix_entry_used); + munmap(dev->msix_table_page, 0x1000); + return -EFAULT; } static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev) @@ -1827,6 +1901,10 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev) strerror(errno)); } dev->msix_table_page = NULL; + if (dev->routing_updater_entry) { + kvm_del_routing_updater(dev->routing_updater_entry); + dev->routing_updater_entry = NULL; + } free(dev->dev.msix_entry_used); dev->dev.msix_entry_used = NULL; } diff --git a/hw/device-assignment.h b/hw/device-assignment.h index d92606e..1716738 100644 --- a/hw/device-assignment.h +++ b/hw/device-assignment.h @@ -32,6 +32,7 @@ #include "qemu-common.h" #include "qemu-queue.h" #include "pci.h" +#include "qemu-kvm.h" /* From include/linux/pci.h in the kernel sources */ #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) @@ -108,10 +109,12 @@ typedef struct AssignedDevice { struct kvm_irq_routing_entry *entry; void *msix_table_page; target_phys_addr_t msix_table_addr; + target_phys_addr_t guest_msix_table_addr; int mmio_index; int need_emulate_cmd; char *configfd_name; int32_t bootindex; + KVMRoutingUpdateEntry *routing_updater_entry; QLIST_ENTRY(AssignedDevice) next; } AssignedDevice; diff --git a/qemu-kvm.c b/qemu-kvm.c index d282c95..e4d100f 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -511,6 +511,46 @@ static int handle_mmio(CPUState *env) return 0; } +static QLIST_HEAD(kvm_routing_update_entry_head, kvm_routing_update_entry) kvm_routing_update_entry_head; + +KVMRoutingUpdateEntry *kvm_add_routing_updater(KVMRoutingUpdateHandler *cb, void *opaque) +{ + KVMRoutingUpdateEntry *e; + + e = qemu_mallocz(sizeof (*e)); + + e->cb = cb; + e->opaque = opaque; + QLIST_INSERT_HEAD(&kvm_routing_update_entry_head, e, entries); + return e; +} + +void kvm_del_routing_updater(KVMRoutingUpdateEntry *e) +{ + QLIST_REMOVE(e, entries); + qemu_free(e); +} + +#ifdef KVM_CAP_MSIX_MMIO +static void kvm_update_msix_routing(CPUState *env) +{ + struct kvm_msix_routing_data data; + KVMRoutingUpdateEntry *e; + + data.dev_id = env->kvm_run->msix_routing.dev_id; + data.type = env->kvm_run->msix_routing.type; + data.entry_idx = env->kvm_run->msix_routing.entry_idx; + data.flags = env->kvm_run->msix_routing.flags; + for (e = kvm_routing_update_entry_head.lh_first; e; e = e->entries.le_next) { + if (e->cb(e->opaque, &data) == 0) + return; + } + fprintf(stderr, "unhandled MSI-X routing update: " + "dev 0x%x, type %d, entry 0x%x, flags 0x%lx\n", + data.dev_id, data.type, data.entry_idx, data.flags); +} +#endif + int handle_io_window(kvm_context_t kvm) { return 1; @@ -647,6 +687,12 @@ int kvm_run(CPUState *env) kvm_handle_internal_error(env, run); r = 1; break; +#ifdef KVM_CAP_MSIX_MMIO + case KVM_EXIT_MSIX_ROUTING_UPDATE: + kvm_update_msix_routing(env); + r = 1; + break; +#endif default: if (kvm_arch_run(env)) { fprintf(stderr, "unhandled vm exit: 0x%x\n", run->exit_reason); diff --git a/qemu-kvm.h b/qemu-kvm.h index 48ff52d..11a62c7 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -773,6 +773,25 @@ int kvm_tpr_enable_vapic(CPUState *env); unsigned long kvm_get_thread_id(void); int kvm_cpu_is_stopped(CPUState *env); +struct kvm_msix_routing_data { + uint32_t dev_id; + uint16_t type; + uint16_t entry_idx; + uint64_t flags; +}; + +typedef struct kvm_routing_update_entry KVMRoutingUpdateEntry; +typedef int KVMRoutingUpdateHandler(void *opaque, + struct kvm_msix_routing_data *data); + +struct kvm_routing_update_entry { + KVMRoutingUpdateHandler *cb; + void *opaque; + QLIST_ENTRY (kvm_routing_update_entry) entries; +}; + +KVMRoutingUpdateEntry *kvm_add_routing_updater(KVMRoutingUpdateHandler *cb, void *opaque); +void kvm_del_routing_updater(KVMRoutingUpdateEntry *e); #endif #endif -- 1.7.0.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html