On 07/25/2012 08:03 PM, Alex Williamson wrote: > +/* > + * Resource setup > + */ > +static void vfio_unmap_bar(VFIODevice *vdev, int nr) > +{ > + VFIOBAR *bar = &vdev->bars[nr]; > + uint64_t size; > + > + if (!memory_region_size(&bar->mem)) { > + return; > + } > + > + size = memory_region_size(&bar->mmap_mem); > + if (size) { > + memory_region_del_subregion(&bar->mem, &bar->mmap_mem); > + munmap(bar->mmap, size); > + } > + > + if (vdev->msix && vdev->msix->table_bar == nr) { > + size = memory_region_size(&vdev->msix->mmap_mem); > + if (size) { > + memory_region_del_subregion(&bar->mem, &vdev->msix->mmap_mem); > + munmap(vdev->msix->mmap, size); > + } > + } Are the three size checks needed? Everything should work without them from the memory core point of view. > + > + memory_region_destroy(&bar->mem); > +} > + > +static int vfio_mmap_bar(VFIOBAR *bar, MemoryRegion *mem, MemoryRegion *submem, > + void **map, size_t size, off_t offset, > + const char *name) > +{ > + *map = mmap(NULL, size, PROT_READ | PROT_WRITE, > + MAP_SHARED, bar->fd, bar->fd_offset + offset); > + if (*map == MAP_FAILED) { > + *map = NULL; > + return -1; > + } > + > + memory_region_init_ram_ptr(submem, name, size, *map); > + memory_region_add_subregion(mem, offset, submem); > + > + return 0; > +} > + > +static void vfio_map_bar(VFIODevice *vdev, int nr, uint8_t type) > +{ > + VFIOBAR *bar = &vdev->bars[nr]; > + unsigned size = bar->size; > + char name[64]; > + > + sprintf(name, "VFIO %04x:%02x:%02x.%x BAR %d", vdev->host.domain, > + vdev->host.bus, vdev->host.slot, vdev->host.function, nr); > + > + /* A "slow" read/write mapping underlies all BARs */ > + memory_region_init_io(&bar->mem, &vfio_bar_ops, bar, name, size); > + pci_register_bar(&vdev->pdev, nr, type, &bar->mem); So far all container BARs have been pure containers, without RAM or I/O callbacks. It should all work, but this sets precedent and requires it to work. I guess there's no problem supporting it though. > + > + if (type & PCI_BASE_ADDRESS_SPACE_IO) { > + return; /* IO space is only slow, don't expect high perf here */ > + } What about non-x86 where IO is actually memory? I think you can drop this and let the address space filtering in the listener drop it if it turns out to be in IO space. > + > + if (size & ~TARGET_PAGE_MASK) { > + error_report("%s is too small to mmap, this may affect performance.\n", > + name); > + return; > + } We can work a little harder and align the host space offset with the guest space offset, and map it in. > + > + /* > + * We can't mmap areas overlapping the MSIX vector table, so we > + * potentially insert a direct-mapped subregion before and after it. > + */ This splitting is what the memory core really enjoys. You can just place the MSIX page over the RAM page and let it do the cut-n-paste. > + if (vdev->msix && vdev->msix->table_bar == nr) { > + size = vdev->msix->table_offset & TARGET_PAGE_MASK; > + } > + > + if (size) { > + strcat(name, " mmap"); > + if (vfio_mmap_bar(bar, &bar->mem, &bar->mmap_mem, &bar->mmap, > + size, 0, name)) { > + error_report("%s Failed. Performance may be slow\n", name); > + } > + } > + > + if (vdev->msix && vdev->msix->table_bar == nr) { > + unsigned start; > + > + start = TARGET_PAGE_ALIGN(vdev->msix->table_offset + > + (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); > + > + if (start < bar->size) { > + size = bar->size - start; > + strcat(name, " msix-hi"); > + /* MSIXInfo contains another MemoryRegion for this mapping */ > + if (vfio_mmap_bar(bar, &bar->mem, &vdev->msix->mmap_mem, > + &vdev->msix->mmap, size, start, name)) { > + error_report("%s Failed. Performance may be slow\n", name); > + } > + } > + } > + > + return; > +} > + > + > +static int __vfio_get_device(VFIOGroup *group, > + const char *name, VFIODevice *vdev) __foo is a reserved symbol. > +{ > + int ret; > + > + ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); > + if (ret < 0) { > + error_report("vfio: error getting device %s from group %d: %s", > + name, group->groupid, strerror(errno)); > + error_report("Verify all devices in group %d " > + "are bound to vfio-pci or pci-stub and not already in use", > + group->groupid); > + return -1; > + } > + > + vdev->group = group; > + QLIST_INSERT_HEAD(&group->device_list, vdev, next); > + > + vdev->fd = ret; > + > + return 0; > +} > + > + > +static Property vfio_pci_dev_properties[] = { > + DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIODevice, host), > + //TODO - support passed fds... is this necessary? Yes. > + //DEFINE_PROP_STRING("vfiofd", VFIODevice, vfiofd_name), > + //DEFINE_PROP_STRING("vfiogroupfd, VFIODevice, vfiogroupfd_name), > + DEFINE_PROP_END_OF_LIST(), > +}; > + > + > + > +typedef struct MSIVector { > + EventNotifier interrupt; /* eventfd triggered on interrupt */ > + struct VFIODevice *vdev; /* back pointer to device */ > + int vector; /* the vector number for this element */ > + int virq; /* KVM irqchip route for Qemu bypass */ This calls for an abstraction (don't we have a cache where we look those up?) > + bool use; > +} MSIVector; > + > + > +typedef struct VFIOContainer { > + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ > + struct { > + /* enable abstraction to support various iommu backends */ > + union { > + MemoryListener listener; /* Used by type1 iommu */ > + }; The usual was is to have a Type1VFIOContainer deriving from VFIOContainer and adding a MemoryListener. > + void (*release)(struct VFIOContainer *); > + } iommu_data; > + QLIST_HEAD(, VFIOGroup) group_list; > + QLIST_ENTRY(VFIOContainer) next; > +} VFIOContainer; > + > +#endif /* __VFIO_H__ */ > diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h > index 5a9d4e3..bd1a76c 100644 > --- a/linux-headers/linux/kvm.h > +++ b/linux-headers/linux/kvm.h Separate patch when leaving RFC mode. > diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h > new file mode 100644 > index 0000000..0a4f180 > --- /dev/null > +++ b/linux-headers/linux/vfio.h > @@ -0,0 +1,445 @@ > +/* > + * VFIO API definition > + * > + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. > + * Author: Alex Williamson <alex.williamson@xxxxxxxxxx> > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + */ > +#ifndef VFIO_H > +#define VFIO_H > + > +#include <linux/types.h> > +#include <linux/ioctl.h> > + > +#define VFIO_API_VERSION 0 > + > +#ifdef __KERNEL__ /* Internal VFIO-core/bus driver API */ Use the exported file, that gets rid of the __KERNEL__ bits. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html