On 08/01/2012 08:18 AM, Alex Williamson wrote: > This adds the core of the QEMU VFIO-based PCI device assignment driver. > To make use of this driver, enable CONFIG_VFIO, CONFIG_VFIO_IOMMU_TYPE1, > and CONFIG_VFIO_PCI in your host Linux kernel config. Load the vfio-pci > module. To assign device 0000:05:00.0 to a guest, do the following: > > for dev in $(ls /sys/bus/pci/devices/0000:05:00.0/iommu_group/devices); do > vendor=$(cat /sys/bus/pci/devices/$dev/vendor) > device=$(cat /sys/bus/pci/devices/$dev/device) > if [ -e /sys/bus/pci/devices/$dev/driver ]; then > echo $dev > /sys/bus/pci/devices/$dev/driver/unbind > fi > echo $vendor $device > /sys/bus/pci/drivers/vfio-pci/new_id > done > > See Documentation/vfio.txt in the Linux kernel tree for further > description of IOMMU groups and VFIO. > > Then launch qemu including the option: > > -device vfio-pci,host=0000:05:00.0 > > Support for legacy PCI interrupts (INTx) is not yet included and will > be added in a future update. Both MSI and MSI-X are supported here. > + > +static void vfio_update_irq(PCIDevice *pdev) > +{ > + VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); > + PCIINTxRoute route; > + > + if (vdev->interrupt != INT_INTx) { > + return; > + } > + > + route = pci_device_route_intx_to_irq(&vdev->pdev, vdev->intx.pin); > + if (!memcmp(&route, &vdev->intx.route, sizeof(route))) { > + return; /* Nothing changed */ > + } You can't memcmp() structures, the compiler may add uninitialized holes that will miscompare. It's probably harmless here since it's an optimization. Unrelated nit: memcmp() doesn't return a boolean or a count, so !memcmp() is really unintuitive, at least to me. > + > +static int vfio_enable_intx(VFIODevice *vdev) > +{ > + struct vfio_irq_set_fd irq_set_fd = { > + .irq_set = { > + .argsz = sizeof(irq_set_fd), > + .flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER, > + .index = VFIO_PCI_INTX_IRQ_INDEX, > + .start = 0, > + .count = 1, > + }, > + }; > + uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1); > + > + if (!pin) { > + return 0; > + } > + > + vfio_disable_interrupts(vdev); > + > + vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */ > + vdev->intx.route = pci_device_route_intx_to_irq(&vdev->pdev, > + vdev->intx.pin); > + /* TBD - Enable QEMU eoi notifier */ > + > + if (event_notifier_init(&vdev->intx.interrupt, 0)) { > + error_report("vfio: Error: event_notifier_init failed\n"); > + return -1; return -error is better. > + } > + > + irq_set_fd.fd = event_notifier_get_fd(&vdev->intx.interrupt); > + qemu_set_fd_handler(irq_set_fd.fd, vfio_intx_interrupt, NULL, vdev); > + > + if (ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set_fd)) { > + error_report("vfio: Error: Failed to setup INTx fd: %s\n", > + strerror(errno)); > + return -1; > + } > + > + vfio_enable_intx_kvm(vdev); > + > + vdev->interrupt = INT_INTx; > + > + DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain, > + vdev->host.bus, vdev->host.slot, vdev->host.function); > + > + return 0; > +} > + > + > + > +/* XXX This should move to msi.c */ Well? > +static MSIMessage msi_get_msg(PCIDevice *pdev, unsigned int vector) > +{ > + uint16_t flags = pci_get_word(pdev->config + pdev->msi_cap + PCI_MSI_FLAGS); > + bool msi64bit = flags & PCI_MSI_FLAGS_64BIT; > + MSIMessage msg; > + > + if (msi64bit) { > + msg.address = pci_get_quad(pdev->config + > + pdev->msi_cap + PCI_MSI_ADDRESS_LO); > + } else { > + msg.address = pci_get_long(pdev->config + > + pdev->msi_cap + PCI_MSI_ADDRESS_LO); > + } > + > + msg.data = pci_get_word(pdev->config + pdev->msi_cap + > + (msi64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32)); > + msg.data += vector; > + > + return msg; > +} > + > + > +/* > + * IO Port/MMIO - Beware of the endians, VFIO is always little endian > + */ > +static void vfio_bar_write(void *opaque, target_phys_addr_t addr, > + uint64_t data, unsigned size) > +{ > + VFIOBAR *bar = opaque; > + uint8_t buf[8]; > + > + switch (size) { > + case 1: > + *buf = data & 0xff; > + break; > + case 2: > + *(uint16_t *)buf = cpu_to_le16(data); > + break; > + case 4: > + *(uint32_t *)buf = cpu_to_le32(data); > + break; This works accidentally on machines that require alignment, since there's no requirement from the compiler to align buf. You can use a union to align it. > + default: > + hw_error("vfio: unsupported write size, %d bytes\n", size); > + break; > + } > + > + if (pwrite(bar->fd, buf, size, bar->fd_offset + addr) != size) { > + error_report("%s(,0x%"PRIx64", 0x%"PRIx64", %d) failed: %s\n", > + __func__, addr, data, size, strerror(errno)); > + } > + > + DPRINTF("%s(BAR%d+0x%"PRIx64", 0x%"PRIx64", %d)\n", > + __func__, bar->nr, addr, data, size); > +} > + > + > +static void vfio_listener_region_add(MemoryListener *listener, > + MemoryRegionSection *section) > +{ > + VFIOContainer *container = container_of(listener, VFIOContainer, > + iommu_data.listener); > + target_phys_addr_t iova, end; > + void *vaddr; > + int ret; > + > + if (vfio_listener_skipped_section(section)) { > + DPRINTF("vfio: SKIPPING region_add %016lx - %016lx\n", > + section->offset_within_address_space, > + section->offset_within_address_space + section->size - 1); > + return; > + } > + > + if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != > + (section->offset_within_region & ~TARGET_PAGE_MASK))) { > + error_report("%s received unaligned region\n", __func__); Is it really an error? I think you can just add the condition to skipped_section. > + return; > + } > + > + -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html