On Thu, Nov 04, 2010 at 02:18:21PM +0800, Sheng Yang wrote: > This patch emulated MSI-X per vector mask bit on assigned device. > > Signed-off-by: Sheng Yang <sheng@xxxxxxxxxxxxxxx> > --- > hw/device-assignment.c | 161 ++++++++++++++++++++++++++++++++++++++++++++++-- > 1 files changed, 155 insertions(+), 6 deletions(-) > > diff --git a/hw/device-assignment.c b/hw/device-assignment.c > index 8a98876..639aa0b 100644 > --- a/hw/device-assignment.c > +++ b/hw/device-assignment.c > @@ -62,6 +62,11 @@ static void assigned_dev_load_option_rom(AssignedDevice *dev); > > static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev); > > +static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn) > +{ > + return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn; > +} > + > static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region, > uint32_t addr, int len, uint32_t *val) > { > @@ -264,6 +269,9 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, > AssignedDevRegion *region = &r_dev->v_addrs[region_num]; > PCIRegion *real_region = &r_dev->real_device.regions[region_num]; > int ret = 0; > +#ifdef KVM_CAP_DEVICE_MSIX_MASK > + struct kvm_assigned_msix_mmio msix_mmio; > +#endif > > DEBUG("e_phys=%08" FMT_PCIBUS " r_virt=%p type=%d len=%08" FMT_PCIBUS " region_num=%d \n", > e_phys, region->u.r_virtbase, type, e_size, region_num); > @@ -282,6 +290,16 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, > > cpu_register_physical_memory(e_phys + offset, > TARGET_PAGE_SIZE, r_dev->mmio_index); > +#ifdef KVM_CAP_DEVICE_MSIX_MASK > + memset(&msix_mmio, 0, sizeof(struct kvm_assigned_msix_mmio)); > + msix_mmio.assigned_dev_id = calc_assigned_dev_id(r_dev->h_segnr, > + r_dev->h_busnr, r_dev->h_devfn); > + msix_mmio.base_addr = e_phys + offset; > + /* We required kernel MSI-X support */ > + ret = kvm_assign_reg_msix_mmio(kvm_context, &msix_mmio); > + if (ret) > + fprintf(stderr, "fail to register in-kernel msix_mmio!\n"); > +#endif > } > } > > @@ -824,11 +842,6 @@ static void free_assigned_device(AssignedDevice *dev) > } > } > > -static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn) > -{ > - return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn; > -} > - > static void assign_failed_examine(AssignedDevice *dev) > { > char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns; > @@ -1123,6 +1136,27 @@ static int get_msix_entries_max_nr(AssignedDevice *adev) > return entries_max_nr; > } > > +#ifdef KVM_CAP_DEVICE_MSIX_MASK > +static int assigned_dev_msix_entry_masked(AssignedDevice *adev, int entry) > +{ > + struct kvm_assigned_msix_entry msix_entry; > + int r; > + > + memset(&msix_entry, 0, sizeof msix_entry); > + msix_entry.assigned_dev_id = calc_assigned_dev_id(adev->h_segnr, > + adev->h_busnr, (uint8_t)adev->h_devfn); > + msix_entry.entry = entry; > + msix_entry.flags = KVM_MSIX_FLAG_QUERY_MASK; > + r = kvm_assign_get_msix_entry(kvm_context, &msix_entry); > + if (r) { > + fprintf(stderr, "assigned_dev_msix_entry_masked: " > + "Fail to get mask bit of entry %d\n", entry); > + return 1; This error handling seems pretty useless. assert? > + } > + return (msix_entry.flags & KVM_MSIX_FLAG_MASK); > +} > +#endif > + > static int get_msix_valid_entries_nr(AssignedDevice *adev, > uint16_t entries_max_nr) > { > @@ -1136,7 +1170,11 @@ static int get_msix_valid_entries_nr(AssignedDevice *adev, > memcpy(&msg_ctrl, va + i * 16 + 12, 4); > memcpy(&msg_data, va + i * 16 + 8, 4); > /* Ignore unused entry even it's unmasked */ > +#ifdef KVM_CAP_DEVICE_MSIX_MASK > + if (assigned_dev_msix_entry_masked(adev, i)) > +#else > if (msg_data == 0) > +#endif So, we are replacing msg_data == 0 check with masked check? If yes why not do this for non-KVM_CAP_DEVICE_MSIX_MASK too? > continue; > entries_nr ++; > } > @@ -1165,6 +1203,8 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev, > } > > free_dev_irq_entries(adev); > + memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV * > + sizeof(*pci_dev->msix_entry_used)); > adev->irq_entries_nr = entries_nr; > adev->entry = calloc(entries_nr, sizeof(struct kvm_irq_routing_entry)); > if (!adev->entry) { > @@ -1179,7 +1219,11 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev, > break; > memcpy(&msg_ctrl, va + i * 16 + 12, 4); > memcpy(&msg_data, va + i * 16 + 8, 4); > +#ifdef KVM_CAP_DEVICE_MSIX_MASK > + if (assigned_dev_msix_entry_masked(adev, i)) > +#else > if (msg_data == 0) > +#endif You can't use ifdef to check that kernel supports an ioctl. You must check this at runtime. > continue; > > memcpy(&msg_addr, va + i * 16, 4); > @@ -1200,6 +1244,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev, > > msix_entry.gsi = adev->entry[entries_nr].gsi; > msix_entry.entry = i; > + pci_dev->msix_entry_used[i] = 1; > r = kvm_assign_set_msix_entry(kvm_context, &msix_entry); > if (r) { > fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r)); > @@ -1243,6 +1288,8 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, int enable_msix) > perror("assigned_dev_update_msix: deassign irq"); > > assigned_dev->irq_requested_type = 0; > + memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV * > + sizeof(*pci_dev->msix_entry_used)); > } > > entries_max_nr = get_msix_entries_max_nr(assigned_dev); > @@ -1250,10 +1297,16 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, int enable_msix) > fprintf(stderr, "assigned_dev_update_msix: MSI-X entries_max_nr == 0"); > return; > } > + /* > + * Guest may try to enable MSI-X before setting MSI-X entry done, so > + * let's wait until guest unmask the entries. > + */ Well it can also set up any number of entries, enable msix then set up more entries. Now what? > entries_nr = get_msix_valid_entries_nr(assigned_dev, entries_max_nr); > if (entries_nr == 0) { > +#ifndef KVM_CAP_DEVICE_MSIX_MASK > if (enable_msix) > fprintf(stderr, "MSI-X entry number is zero!\n"); And what happens then? > +#endif > return; > } > if (enable_msix) { > @@ -1297,7 +1350,8 @@ static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t ad > if (address <= ctrl_pos && address + len > ctrl_pos) { > ctrl_pos--; /* control is word long */ > ctrl_word = (uint16_t *)(pci_dev->config + ctrl_pos); > - assigned_dev_update_msix(pci_dev, (*ctrl_word & PCI_MSIX_ENABLE)); > + assigned_dev_update_msix(pci_dev, > + (*ctrl_word & PCI_MSIX_ENABLE) && !(*ctrl_word & PCI_MSIX_MASK)); > } > pos += PCI_CAPABILITY_CONFIG_MSIX_LENGTH; > } > @@ -1395,10 +1449,101 @@ static void msix_mmio_writel(void *opaque, > AssignedDevice *adev = opaque; > unsigned int offset = addr & 0xfff; > void *page = adev->msix_table_page; > +#ifdef KVM_CAP_DEVICE_MSIX_MASK > + int pos, ctrl_word, index; > + struct kvm_irq_routing_entry new_entry = {}; > + int entry_idx, entries_max_nr, r = 0, i; > + uint32_t msg_ctrl, msg_data, msg_upper_addr, msg_addr; > +#endif > > DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n", > addr, val); > memcpy((void *)((char *)page + offset), &val, 4); > + > +#ifdef KVM_CAP_DEVICE_MSIX_MASK > + index = offset / 16; > + > + /* Check if mask bit is being accessed */ > + memcpy(&msg_addr, (char *)page + index * 16, 4); > + memcpy(&msg_upper_addr, (char *)page + index * 16 + 4, 4); > + memcpy(&msg_data, (char *)page + index * 16 + 8, 4); > + memcpy(&msg_ctrl, (char *)page + index * 16 + 12, 4); > + DEBUG("MSI-X entries index %d: " > + "msg_addr 0x%x, msg_upper_addr 0x%x, msg_data 0x%x, vec_ctl 0x%x\n", > + index, msg_addr, msg_upper_addr, msg_data, msg_ctrl); > + > + if (adev->cap.available & ASSIGNED_DEVICE_CAP_MSI) > + pos = adev->dev.cap.start + PCI_CAPABILITY_CONFIG_MSI_LENGTH; > + else > + pos = adev->dev.cap.start; > + > + ctrl_word = *(uint16_t *)(adev->dev.config + pos + 2); > + > + if (!((ctrl_word & PCI_MSIX_ENABLE) && !(ctrl_word & PCI_MSIX_MASK))) > + return; > + > + if (!assigned_dev_msix_entry_masked(adev, index)) { > + if (!adev->dev.msix_entry_used[index]) { > + DEBUG("Try to modify unenabled MSI-X entry %d's mask. " > + "Reenable MSI-X.\n", > + index); > + assigned_dev_update_msix(&adev->dev, 1); > + } > + return; > + } > + > + if (!adev->dev.msix_entry_used[index]) > + return; > + > + /* > + * We're here only because guest want to modify MSI data/addr, and > + * kernel would filter those writing with mask bit unset. > + */ > + entries_max_nr = get_msix_entries_max_nr(adev); > + > + /* > + * Find the index of routing entry, it can be different from 'index' if > + * empty entry existed in between > + */ > + entry_idx = -1; > + for (i = 0; i <= index; i++) { > + if (adev->dev.msix_entry_used[i]) > + entry_idx ++; > + } > + if (entry_idx >= entries_max_nr || entry_idx == -1) { > + fprintf(stderr, "msix_mmio_writel: Entry idx %d exceed limit!\n", > + entry_idx); > + return; > + } > + > + if (!assigned_dev_msix_entry_masked(adev, index)) { > + fprintf(stderr, "msix_mmio_writel: Trying write to unmasked entry!\n"); > + return; > + } > + > + new_entry.gsi = adev->entry[entry_idx].gsi; > + new_entry.type = KVM_IRQ_ROUTING_MSI; > + new_entry.flags = 0; > + new_entry.u.msi.address_lo = msg_addr; > + new_entry.u.msi.address_hi = msg_upper_addr; > + new_entry.u.msi.data = msg_data; > + if (memcmp(&adev->entry[entry_idx].u.msi, &new_entry.u.msi, > + sizeof new_entry.u.msi)) { > + r = kvm_update_routing_entry(&adev->entry[entry_idx], &new_entry); > + if (r) { > + perror("msix_mmio_writel: kvm_update_routing_entry failed\n"); > + return; > + } > + r = kvm_commit_irq_routes(); > + if (r) { > + perror("msix_mmio_writel: kvm_commit_irq_routes failed\n"); > + return; > + } > + } > + adev->entry[entry_idx].u.msi.address_lo = msg_addr; > + adev->entry[entry_idx].u.msi.address_hi = msg_upper_addr; > + adev->entry[entry_idx].u.msi.data = msg_data; > +#endif > } > > static void msix_mmio_writew(void *opaque, > @@ -1436,6 +1581,8 @@ static int assigned_dev_register_msix_mmio(AssignedDevice *dev) > memset(dev->msix_table_page, 0, 0x1000); > dev->mmio_index = cpu_register_io_memory( > msix_mmio_read, msix_mmio_write, dev); > + dev->dev.msix_entry_used = qemu_mallocz(KVM_MAX_MSIX_PER_DEV * > + sizeof *dev->dev.msix_entry_used); > return 0; > } > > @@ -1452,6 +1599,8 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev) > strerror(errno)); > } > dev->msix_table_page = NULL; > + free(dev->dev.msix_entry_used); > + dev->dev.msix_entry_used = NULL; > } > > static int assigned_initfn(struct PCIDevice *pci_dev) > -- > 1.7.0.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html