On Wed, 2014-04-09 at 16:33 +0100, Eric Auger wrote: > From: Kim Phillips <kim.phillips@xxxxxxxxxx> > > Functions for which PCI and platform device support share are moved > into common.c. The common vfio_{get,put}_group() get an additional > argument, a pointer to a vfio_reset_handler(), for which to pass on to > qemu_register_reset, but only if it exists (the platform device code > currently passes a NULL as its reset_handler). > > For the platform device code, we basically use SysBusDevice > instead of PCIDevice. Since realize() returns void, unlike > PCIDevice's initfn, error codes are moved into the > error message text with %m. > > Currently only MMIO access is supported at this time. > > The perceived path for future QEMU development is: > > - add support for interrupts > - verify and test platform dev unmap path > - test existing PCI path for any regressions > - add support for creating platform devices on the qemu command line > - currently device address specification is hardcoded for test > development on Calxeda Midway's fff51000.ethernet device > - reset is not supported and registration of reset functions is > bypassed for platform devices. > - there is no standard means of resetting a platform device, > unsure if it suffices to be handled at device--VFIO binding time > > Signed-off-by: Kim Phillips <kim.phillips@xxxxxxxxxx> > > [1] http://www.spinics.net/lists/kvm-arm/msg08195.html > --- > hw/vfio/Makefile.objs | 2 + > hw/vfio/common.c | 486 ++++++++++++++++++++++++++++++++++++++++++++++++++ > hw/vfio/pci.c | 480 ++----------------------------------------------- > hw/vfio/platform.c | 381 +++++++++++++++++++++++++++++++++++++++ > hw/vfio/vfio-common.h | 55 ++++++ > 5 files changed, 937 insertions(+), 467 deletions(-) > create mode 100644 hw/vfio/common.c > create mode 100644 hw/vfio/platform.c > create mode 100644 hw/vfio/vfio-common.h > > diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs > index 31c7dab..c5c76fe 100644 > --- a/hw/vfio/Makefile.objs > +++ b/hw/vfio/Makefile.objs > @@ -1,3 +1,5 @@ > ifeq ($(CONFIG_LINUX), y) > +obj-$(CONFIG_SOFTMMU) += common.o > obj-$(CONFIG_PCI) += pci.o > +obj-$(CONFIG_SOFTMMU) += platform.o > endif > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > new file mode 100644 > index 0000000..9d1f723 > --- /dev/null > +++ b/hw/vfio/common.c > @@ -0,0 +1,486 @@ > +/* > + * vfio based device assignment support > + * > + * Copyright Red Hat, Inc. 2012 > + * > + * Authors: > + * Alex Williamson <alex.williamson@xxxxxxxxxx> > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the COPYING file in the top-level directory. > + * > + * Based on qemu-kvm device-assignment: > + * Adapted for KVM by Qumranet. > + * Copyright (c) 2007, Neocleus, Alex Novik (alex@xxxxxxxxxxxx) > + * Copyright (c) 2007, Neocleus, Guy Zana (guy@xxxxxxxxxxxx) > + * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@xxxxxxxxxxxx) > + * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@xxxxxxxxxx) > + * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@xxxxxxxxxx) > + */ > + > +#include <dirent.h> > +#include <linux/vfio.h> > +#include <sys/ioctl.h> > +#include <sys/mman.h> > +#include <sys/stat.h> > +#include <sys/types.h> > +#include <unistd.h> > + > +#include "config.h" > +#include "exec/address-spaces.h" > +#include "exec/memory.h" > +#include "hw/pci/msi.h" > +#include "hw/pci/msix.h" > +#include "hw/pci/pci.h" I expect these pci includes aren't really needed > +#include "qemu-common.h" > +#include "qemu/error-report.h" > +#include "qemu/event_notifier.h" > +#include "qemu/queue.h" > +#include "qemu/range.h" > +#include "sysemu/kvm.h" > +#include "sysemu/sysemu.h" > + > +#include "vfio-common.h" > + > +#define DEBUG_VFIO > +#ifdef DEBUG_VFIO > +#define DPRINTF(fmt, ...) \ > + do { fprintf(stderr, "vfio: " fmt, ## __VA_ARGS__); } while (0) > +#else > +#define DPRINTF(fmt, ...) \ > + do { } while (0) > +#endif DEBUG_VFIO should probably be in vfio-common.h > + > +static QLIST_HEAD(, VFIOContainer) > + container_list = QLIST_HEAD_INITIALIZER(container_list); > + > +QLIST_HEAD(, VFIOGroup) > + group_list = QLIST_HEAD_INITIALIZER(group_list); > + > + > +struct VFIODevice; I don't see where this is needed. > + > +#ifdef CONFIG_KVM > +/* > + * We have a single VFIO pseudo device per KVM VM. Once created it lives > + * for the life of the VM. Closing the file descriptor only drops our > + * reference to it and the device's reference to kvm. Therefore once > + * initialized, this file descriptor is only released on QEMU exit and > + * we'll re-use it should another vfio device be attached before then. > + */ > +static int vfio_kvm_device_fd = -1; > +#endif > + > +/* > + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 > + */ > +static int vfio_dma_unmap(VFIOContainer *container, > + hwaddr iova, ram_addr_t size) > +{ > + struct vfio_iommu_type1_dma_unmap unmap = { > + .argsz = sizeof(unmap), > + .flags = 0, > + .iova = iova, > + .size = size, > + }; > + > + if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { > + DPRINTF("VFIO_UNMAP_DMA: %d\n", -errno); > + return -errno; > + } > + > + return 0; > +} > + > +static int vfio_dma_map(VFIOContainer *container, hwaddr iova, > + ram_addr_t size, void *vaddr, bool readonly) > +{ > + struct vfio_iommu_type1_dma_map map = { > + .argsz = sizeof(map), > + .flags = VFIO_DMA_MAP_FLAG_READ, > + .vaddr = (__u64)(uintptr_t)vaddr, > + .iova = iova, > + .size = size, > + }; > + > + if (!readonly) { > + map.flags |= VFIO_DMA_MAP_FLAG_WRITE; > + } > + > + /* > + * Try the mapping, if it fails with EBUSY, unmap the region and try > + * again. This shouldn't be necessary, but we sometimes see it in > + * the the VGA ROM space. > + */ > + if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || > + (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 && > + ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { > + return 0; > + } > + > + DPRINTF("VFIO_MAP_DMA: %d\n", -errno); > + return -errno; > +} > + > +static bool vfio_listener_skipped_section(MemoryRegionSection *section) > +{ > + return !memory_region_is_ram(section->mr) || > + /* > + * Sizing an enabled 64-bit BAR can cause spurious mappings to > + * addresses in the upper part of the 64-bit address space. These > + * are never accessed by the CPU and beyond the address width of > + * some IOMMU hardware. TODO: VFIO should tell us the IOMMU width. > + */ > + section->offset_within_address_space & (1ULL << 63); > +} > + > +static void vfio_listener_region_add(MemoryListener *listener, > + MemoryRegionSection *section) > +{ > + VFIOContainer *container = container_of(listener, VFIOContainer, > + iommu_data.type1.listener); > + hwaddr iova, end; > + void *vaddr; > + int ret; > + > + assert(!memory_region_is_iommu(section->mr)); > + > + if (vfio_listener_skipped_section(section)) { > + DPRINTF("SKIPPING region_add %"HWADDR_PRIx" - %"PRIx64"\n", > + section->offset_within_address_space, > + section->offset_within_address_space + > + int128_get64(int128_sub(section->size, int128_one()))); > + return; > + } > + > + if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != > + (section->offset_within_region & ~TARGET_PAGE_MASK))) { > + error_report("%s received unaligned region", __func__); > + return; > + } > + > + iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); > + end = (section->offset_within_address_space + int128_get64(section->size)) & > + TARGET_PAGE_MASK; > + > + if (iova >= end) { > + return; > + } > + > + vaddr = memory_region_get_ram_ptr(section->mr) + > + section->offset_within_region + > + (iova - section->offset_within_address_space); > + > + DPRINTF("region_add %"HWADDR_PRIx" - %"HWADDR_PRIx" [%p]\n", > + iova, end - 1, vaddr); > + > + memory_region_ref(section->mr); > + ret = vfio_dma_map(container, iova, end - iova, vaddr, section->readonly); > + if (ret) { > + error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " > + "0x%"HWADDR_PRIx", %p) = %d (%m)", > + container, iova, end - iova, vaddr, ret); > + > + /* > + * On the initfn path, store the first error in the container so we > + * can gracefully fail. Runtime, there's not much we can do other > + * than throw a hardware error. > + */ > + if (!container->iommu_data.type1.initialized) { > + if (!container->iommu_data.type1.error) { > + container->iommu_data.type1.error = ret; > + } > + } else { > + hw_error("vfio: DMA mapping failed, unable to continue"); > + } > + } > +} > + > +static void vfio_listener_region_del(MemoryListener *listener, > + MemoryRegionSection *section) > +{ > + VFIOContainer *container = container_of(listener, VFIOContainer, > + iommu_data.type1.listener); > + hwaddr iova, end; > + int ret; > + > + if (vfio_listener_skipped_section(section)) { > + DPRINTF("SKIPPING region_del %"HWADDR_PRIx" - %"PRIx64"\n", > + section->offset_within_address_space, > + section->offset_within_address_space + > + int128_get64(int128_sub(section->size, int128_one()))); > + return; > + } > + > + if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != > + (section->offset_within_region & ~TARGET_PAGE_MASK))) { > + error_report("%s received unaligned region", __func__); > + return; > + } > + > + iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); > + end = (section->offset_within_address_space + int128_get64(section->size)) & > + TARGET_PAGE_MASK; > + > + if (iova >= end) { > + return; > + } > + > + DPRINTF("region_del %"HWADDR_PRIx" - %"HWADDR_PRIx"\n", > + iova, end - 1); > + > + ret = vfio_dma_unmap(container, iova, end - iova); > + memory_region_unref(section->mr); > + if (ret) { > + error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " > + "0x%"HWADDR_PRIx") = %d (%m)", > + container, iova, end - iova, ret); > + } > +} > + > +static MemoryListener vfio_memory_listener = { > + .region_add = vfio_listener_region_add, > + .region_del = vfio_listener_region_del, > +}; > + > +static void vfio_listener_release(VFIOContainer *container) > +{ > + memory_listener_unregister(&container->iommu_data.type1.listener); > +} > + > +static void vfio_kvm_device_add_group(VFIOGroup *group) > +{ > +#ifdef CONFIG_KVM > + struct kvm_device_attr attr = { > + .group = KVM_DEV_VFIO_GROUP, > + .attr = KVM_DEV_VFIO_GROUP_ADD, > + .addr = (uint64_t)(unsigned long)&group->fd, > + }; > + > + if (!kvm_enabled()) { > + return; > + } > + > + if (vfio_kvm_device_fd < 0) { > + struct kvm_create_device cd = { > + .type = KVM_DEV_TYPE_VFIO, > + }; > + > + if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) { > + DPRINTF("KVM_CREATE_DEVICE: %m\n"); > + return; > + } > + > + vfio_kvm_device_fd = cd.fd; > + } > + > + if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { > + error_report("Failed to add group %d to KVM VFIO device: %m", > + group->groupid); > + } > +#endif > +} > + > +static void vfio_kvm_device_del_group(VFIOGroup *group) > +{ > +#ifdef CONFIG_KVM > + struct kvm_device_attr attr = { > + .group = KVM_DEV_VFIO_GROUP, > + .attr = KVM_DEV_VFIO_GROUP_DEL, > + .addr = (uint64_t)(unsigned long)&group->fd, > + }; > + > + if (vfio_kvm_device_fd < 0) { > + return; > + } > + > + if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { > + error_report("Failed to remove group %d from KVM VFIO device: %m", > + group->groupid); > + } > +#endif > +} > + > +static int vfio_connect_container(VFIOGroup *group) > +{ > + VFIOContainer *container; > + int ret, fd; > + > + if (group->container) { > + return 0; > + } > + > + QLIST_FOREACH(container, &container_list, next) { > + if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { > + group->container = container; > + QLIST_INSERT_HEAD(&container->group_list, group, container_next); > + return 0; > + } > + } > + > + fd = qemu_open("/dev/vfio/vfio", O_RDWR); > + if (fd < 0) { > + error_report("vfio: failed to open /dev/vfio/vfio: %m"); > + return -errno; > + } > + > + ret = ioctl(fd, VFIO_GET_API_VERSION); > + if (ret != VFIO_API_VERSION) { > + error_report("vfio: supported vfio version: %d, " > + "reported version: %d", VFIO_API_VERSION, ret); > + close(fd); > + return -EINVAL; > + } > + > + container = g_malloc0(sizeof(*container)); > + container->fd = fd; > + > + if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) { > + ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd); > + if (ret) { > + error_report("vfio: failed to set group container: %m"); > + g_free(container); > + close(fd); > + return -errno; > + } > + > + ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU); > + if (ret) { > + error_report("vfio: failed to set iommu for container: %m"); > + g_free(container); > + close(fd); > + return -errno; > + } > + > + container->iommu_data.type1.listener = vfio_memory_listener; > + container->iommu_data.release = vfio_listener_release; > + > + memory_listener_register(&container->iommu_data.type1.listener, > + &address_space_memory); > + > + if (container->iommu_data.type1.error) { > + ret = container->iommu_data.type1.error; > + vfio_listener_release(container); > + g_free(container); > + close(fd); > + error_report("vfio: memory listener initialization failed for container"); > + return ret; > + } > + > + container->iommu_data.type1.initialized = true; > + > + } else { > + error_report("vfio: No available IOMMU models"); > + g_free(container); > + close(fd); > + return -EINVAL; > + } > + > + QLIST_INIT(&container->group_list); > + QLIST_INSERT_HEAD(&container_list, container, next); > + > + group->container = container; > + QLIST_INSERT_HEAD(&container->group_list, group, container_next); > + > + return 0; > +} > + > +static void vfio_disconnect_container(VFIOGroup *group) > +{ > + VFIOContainer *container = group->container; > + > + if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) { > + error_report("vfio: error disconnecting group %d from container", > + group->groupid); > + } > + > + QLIST_REMOVE(group, container_next); > + group->container = NULL; > + > + if (QLIST_EMPTY(&container->group_list)) { > + if (container->iommu_data.release) { > + container->iommu_data.release(container); > + } > + QLIST_REMOVE(container, next); > + DPRINTF("vfio_disconnect_container: close container->fd\n"); > + close(container->fd); > + g_free(container); > + } > +} > + > +VFIOGroup *vfio_get_group(int groupid, QEMUResetHandler *reset_handler) > +{ > + VFIOGroup *group; > + char path[32]; > + struct vfio_group_status status = { .argsz = sizeof(status) }; > + > + QLIST_FOREACH(group, &group_list, next) { > + if (group->groupid == groupid) { > + return group; > + } > + } > + > + group = g_malloc0(sizeof(*group)); > + > + snprintf(path, sizeof(path), "/dev/vfio/%d", groupid); > + group->fd = qemu_open(path, O_RDWR); > + if (group->fd < 0) { > + error_report("vfio: error opening %s: %m", path); > + g_free(group); > + return NULL; > + } > + > + if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) { > + error_report("vfio: error getting group status: %m"); > + close(group->fd); > + g_free(group); > + return NULL; > + } > + > + if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) { > + error_report("vfio: error, group %d is not viable, please ensure " > + "all devices within the iommu_group are bound to their " > + "vfio bus driver.", groupid); > + close(group->fd); > + g_free(group); > + return NULL; > + } > + > + group->groupid = groupid; > + QLIST_INIT(&group->device_list); > + > + if (vfio_connect_container(group)) { > + error_report("vfio: failed to setup container for group %d", groupid); > + close(group->fd); > + g_free(group); > + return NULL; > + } > + > + if (QLIST_EMPTY(&group_list) && reset_handler) { > + qemu_register_reset(reset_handler, NULL); > + } > + > + QLIST_INSERT_HEAD(&group_list, group, next); > + > + vfio_kvm_device_add_group(group); > + > + return group; > +} > + > +void vfio_put_group(VFIOGroup *group, QEMUResetHandler *reset_handler) > +{ > + if (!QLIST_EMPTY(&group->device_list)) { > + return; > + } > + > + vfio_kvm_device_del_group(group); > + vfio_disconnect_container(group); > + QLIST_REMOVE(group, next); > + DPRINTF("vfio_put_group: close group->fd\n"); > + close(group->fd); > + g_free(group); > + > + if (QLIST_EMPTY(&group_list) && reset_handler) { > + qemu_unregister_reset(reset_handler, NULL); > + } The reset_handler stuff needs work. We can theoretically support both PCI and platform devices simultaneously, but this would only allow one to register a reset handler and one to remove it (which may not be the same one). > +} > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index 9cf5b84..9e70d68 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -1,5 +1,5 @@ > /* > - * vfio based device assignment support > + * vfio based device assignment support - PCI devices > * > * Copyright Red Hat, Inc. 2012 > * > @@ -40,6 +40,8 @@ > #include "sysemu/kvm.h" > #include "sysemu/sysemu.h" There were probably some includes that could be cleaned out with the split. > +#include "vfio-common.h" > + > /* #define DEBUG_VFIO */ > #ifdef DEBUG_VFIO > #define DPRINTF(fmt, ...) \ DEBUG_VFIO is duplicated here... move to vfio-common.h > @@ -55,6 +57,8 @@ > #define VFIO_ALLOW_KVM_MSI 1 > #define VFIO_ALLOW_KVM_MSIX 1 > > +extern QLIST_HEAD(, VFIOGroup) group_list; Define in vfio-common.h? > + > struct VFIODevice; > > typedef struct VFIOQuirk { > @@ -135,25 +139,6 @@ enum { > > struct VFIOGroup; > > -typedef struct VFIOType1 { > - MemoryListener listener; > - int error; > - bool initialized; > -} VFIOType1; > - > -typedef struct VFIOContainer { > - int fd; /* /dev/vfio/vfio, empowered by the attached groups */ > - struct { > - /* enable abstraction to support various iommu backends */ > - union { > - VFIOType1 type1; > - }; > - void (*release)(struct VFIOContainer *); > - } iommu_data; > - QLIST_HEAD(, VFIOGroup) group_list; > - QLIST_ENTRY(VFIOContainer) next; > -} VFIOContainer; > - > /* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */ > typedef struct VFIOMSIXInfo { > uint8_t table_bar; > @@ -200,15 +185,6 @@ typedef struct VFIODevice { > bool rom_read_failed; > } VFIODevice; > > -typedef struct VFIOGroup { > - int fd; > - int groupid; > - VFIOContainer *container; > - QLIST_HEAD(, VFIODevice) device_list; > - QLIST_ENTRY(VFIOGroup) next; > - QLIST_ENTRY(VFIOGroup) container_next; > -} VFIOGroup; > - > typedef struct VFIORomBlacklistEntry { > uint16_t vendor_id; > uint16_t device_id; > @@ -234,23 +210,6 @@ static const VFIORomBlacklistEntry romblacklist[] = { > > #define MSIX_CAP_LENGTH 12 > > -static QLIST_HEAD(, VFIOContainer) > - container_list = QLIST_HEAD_INITIALIZER(container_list); > - > -static QLIST_HEAD(, VFIOGroup) > - group_list = QLIST_HEAD_INITIALIZER(group_list); > - > -#ifdef CONFIG_KVM > -/* > - * We have a single VFIO pseudo device per KVM VM. Once created it lives > - * for the life of the VM. Closing the file descriptor only drops our > - * reference to it and the device's reference to kvm. Therefore once > - * initialized, this file descriptor is only released on QEMU exit and > - * we'll re-use it should another vfio device be attached before then. > - */ > -static int vfio_kvm_device_fd = -1; > -#endif > - > static void vfio_disable_interrupts(VFIODevice *vdev); > static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len); > static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, > @@ -2180,183 +2139,6 @@ static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, > } > > /* > - * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 > - */ > -static int vfio_dma_unmap(VFIOContainer *container, > - hwaddr iova, ram_addr_t size) > -{ > - struct vfio_iommu_type1_dma_unmap unmap = { > - .argsz = sizeof(unmap), > - .flags = 0, > - .iova = iova, > - .size = size, > - }; > - > - if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { > - DPRINTF("VFIO_UNMAP_DMA: %d\n", -errno); > - return -errno; > - } > - > - return 0; > -} > - > -static int vfio_dma_map(VFIOContainer *container, hwaddr iova, > - ram_addr_t size, void *vaddr, bool readonly) > -{ > - struct vfio_iommu_type1_dma_map map = { > - .argsz = sizeof(map), > - .flags = VFIO_DMA_MAP_FLAG_READ, > - .vaddr = (__u64)(uintptr_t)vaddr, > - .iova = iova, > - .size = size, > - }; > - > - if (!readonly) { > - map.flags |= VFIO_DMA_MAP_FLAG_WRITE; > - } > - > - /* > - * Try the mapping, if it fails with EBUSY, unmap the region and try > - * again. This shouldn't be necessary, but we sometimes see it in > - * the the VGA ROM space. > - */ > - if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || > - (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 && > - ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { > - return 0; > - } > - > - DPRINTF("VFIO_MAP_DMA: %d\n", -errno); > - return -errno; > -} > - > -static bool vfio_listener_skipped_section(MemoryRegionSection *section) > -{ > - return !memory_region_is_ram(section->mr) || > - /* > - * Sizing an enabled 64-bit BAR can cause spurious mappings to > - * addresses in the upper part of the 64-bit address space. These > - * are never accessed by the CPU and beyond the address width of > - * some IOMMU hardware. TODO: VFIO should tell us the IOMMU width. > - */ > - section->offset_within_address_space & (1ULL << 63); > -} > - > -static void vfio_listener_region_add(MemoryListener *listener, > - MemoryRegionSection *section) > -{ > - VFIOContainer *container = container_of(listener, VFIOContainer, > - iommu_data.type1.listener); > - hwaddr iova, end; > - void *vaddr; > - int ret; > - > - assert(!memory_region_is_iommu(section->mr)); > - > - if (vfio_listener_skipped_section(section)) { > - DPRINTF("SKIPPING region_add %"HWADDR_PRIx" - %"PRIx64"\n", > - section->offset_within_address_space, > - section->offset_within_address_space + > - int128_get64(int128_sub(section->size, int128_one()))); > - return; > - } > - > - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != > - (section->offset_within_region & ~TARGET_PAGE_MASK))) { > - error_report("%s received unaligned region", __func__); > - return; > - } > - > - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); > - end = (section->offset_within_address_space + int128_get64(section->size)) & > - TARGET_PAGE_MASK; > - > - if (iova >= end) { > - return; > - } > - > - vaddr = memory_region_get_ram_ptr(section->mr) + > - section->offset_within_region + > - (iova - section->offset_within_address_space); > - > - DPRINTF("region_add %"HWADDR_PRIx" - %"HWADDR_PRIx" [%p]\n", > - iova, end - 1, vaddr); > - > - memory_region_ref(section->mr); > - ret = vfio_dma_map(container, iova, end - iova, vaddr, section->readonly); > - if (ret) { > - error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " > - "0x%"HWADDR_PRIx", %p) = %d (%m)", > - container, iova, end - iova, vaddr, ret); > - > - /* > - * On the initfn path, store the first error in the container so we > - * can gracefully fail. Runtime, there's not much we can do other > - * than throw a hardware error. > - */ > - if (!container->iommu_data.type1.initialized) { > - if (!container->iommu_data.type1.error) { > - container->iommu_data.type1.error = ret; > - } > - } else { > - hw_error("vfio: DMA mapping failed, unable to continue"); > - } > - } > -} > - > -static void vfio_listener_region_del(MemoryListener *listener, > - MemoryRegionSection *section) > -{ > - VFIOContainer *container = container_of(listener, VFIOContainer, > - iommu_data.type1.listener); > - hwaddr iova, end; > - int ret; > - > - if (vfio_listener_skipped_section(section)) { > - DPRINTF("SKIPPING region_del %"HWADDR_PRIx" - %"PRIx64"\n", > - section->offset_within_address_space, > - section->offset_within_address_space + > - int128_get64(int128_sub(section->size, int128_one()))); > - return; > - } > - > - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != > - (section->offset_within_region & ~TARGET_PAGE_MASK))) { > - error_report("%s received unaligned region", __func__); > - return; > - } > - > - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); > - end = (section->offset_within_address_space + int128_get64(section->size)) & > - TARGET_PAGE_MASK; > - > - if (iova >= end) { > - return; > - } > - > - DPRINTF("region_del %"HWADDR_PRIx" - %"HWADDR_PRIx"\n", > - iova, end - 1); > - > - ret = vfio_dma_unmap(container, iova, end - iova); > - memory_region_unref(section->mr); > - if (ret) { > - error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " > - "0x%"HWADDR_PRIx") = %d (%m)", > - container, iova, end - iova, ret); > - } > -} > - > -static MemoryListener vfio_memory_listener = { > - .region_add = vfio_listener_region_add, > - .region_del = vfio_listener_region_del, > -}; > - > -static void vfio_listener_release(VFIOContainer *container) > -{ > - memory_listener_unregister(&container->iommu_data.type1.listener); > -} > - > -/* > * Interrupt setup > */ > static void vfio_disable_interrupts(VFIODevice *vdev) > @@ -3221,244 +3003,8 @@ static void vfio_pci_reset_handler(void *opaque) > } > } > > -static void vfio_kvm_device_add_group(VFIOGroup *group) > -{ > -#ifdef CONFIG_KVM > - struct kvm_device_attr attr = { > - .group = KVM_DEV_VFIO_GROUP, > - .attr = KVM_DEV_VFIO_GROUP_ADD, > - .addr = (uint64_t)(unsigned long)&group->fd, > - }; > - > - if (!kvm_enabled()) { > - return; > - } > - > - if (vfio_kvm_device_fd < 0) { > - struct kvm_create_device cd = { > - .type = KVM_DEV_TYPE_VFIO, > - }; > - > - if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) { > - DPRINTF("KVM_CREATE_DEVICE: %m\n"); > - return; > - } > - > - vfio_kvm_device_fd = cd.fd; > - } > - > - if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { > - error_report("Failed to add group %d to KVM VFIO device: %m", > - group->groupid); > - } > -#endif > -} > - > -static void vfio_kvm_device_del_group(VFIOGroup *group) > -{ > -#ifdef CONFIG_KVM > - struct kvm_device_attr attr = { > - .group = KVM_DEV_VFIO_GROUP, > - .attr = KVM_DEV_VFIO_GROUP_DEL, > - .addr = (uint64_t)(unsigned long)&group->fd, > - }; > - > - if (vfio_kvm_device_fd < 0) { > - return; > - } > - > - if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { > - error_report("Failed to remove group %d from KVM VFIO device: %m", > - group->groupid); > - } > -#endif > -} > - > -static int vfio_connect_container(VFIOGroup *group) > -{ > - VFIOContainer *container; > - int ret, fd; > - > - if (group->container) { > - return 0; > - } > - > - QLIST_FOREACH(container, &container_list, next) { > - if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { > - group->container = container; > - QLIST_INSERT_HEAD(&container->group_list, group, container_next); > - return 0; > - } > - } > - > - fd = qemu_open("/dev/vfio/vfio", O_RDWR); > - if (fd < 0) { > - error_report("vfio: failed to open /dev/vfio/vfio: %m"); > - return -errno; > - } > - > - ret = ioctl(fd, VFIO_GET_API_VERSION); > - if (ret != VFIO_API_VERSION) { > - error_report("vfio: supported vfio version: %d, " > - "reported version: %d", VFIO_API_VERSION, ret); > - close(fd); > - return -EINVAL; > - } > - > - container = g_malloc0(sizeof(*container)); > - container->fd = fd; > - > - if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) { > - ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd); > - if (ret) { > - error_report("vfio: failed to set group container: %m"); > - g_free(container); > - close(fd); > - return -errno; > - } > - > - ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU); > - if (ret) { > - error_report("vfio: failed to set iommu for container: %m"); > - g_free(container); > - close(fd); > - return -errno; > - } > - > - container->iommu_data.type1.listener = vfio_memory_listener; > - container->iommu_data.release = vfio_listener_release; > - > - memory_listener_register(&container->iommu_data.type1.listener, > - &address_space_memory); > - > - if (container->iommu_data.type1.error) { > - ret = container->iommu_data.type1.error; > - vfio_listener_release(container); > - g_free(container); > - close(fd); > - error_report("vfio: memory listener initialization failed for container"); > - return ret; > - } > - > - container->iommu_data.type1.initialized = true; > - > - } else { > - error_report("vfio: No available IOMMU models"); > - g_free(container); > - close(fd); > - return -EINVAL; > - } > - > - QLIST_INIT(&container->group_list); > - QLIST_INSERT_HEAD(&container_list, container, next); > - > - group->container = container; > - QLIST_INSERT_HEAD(&container->group_list, group, container_next); > - > - return 0; > -} > - > -static void vfio_disconnect_container(VFIOGroup *group) > -{ > - VFIOContainer *container = group->container; > - > - if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) { > - error_report("vfio: error disconnecting group %d from container", > - group->groupid); > - } > - > - QLIST_REMOVE(group, container_next); > - group->container = NULL; > - > - if (QLIST_EMPTY(&container->group_list)) { > - if (container->iommu_data.release) { > - container->iommu_data.release(container); > - } > - QLIST_REMOVE(container, next); > - DPRINTF("vfio_disconnect_container: close container->fd\n"); > - close(container->fd); > - g_free(container); > - } > -} > - > -static VFIOGroup *vfio_get_group(int groupid) > -{ > - VFIOGroup *group; > - char path[32]; > - struct vfio_group_status status = { .argsz = sizeof(status) }; > - > - QLIST_FOREACH(group, &group_list, next) { > - if (group->groupid == groupid) { > - return group; > - } > - } > - > - group = g_malloc0(sizeof(*group)); > - > - snprintf(path, sizeof(path), "/dev/vfio/%d", groupid); > - group->fd = qemu_open(path, O_RDWR); > - if (group->fd < 0) { > - error_report("vfio: error opening %s: %m", path); > - g_free(group); > - return NULL; > - } > - > - if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) { > - error_report("vfio: error getting group status: %m"); > - close(group->fd); > - g_free(group); > - return NULL; > - } > - > - if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) { > - error_report("vfio: error, group %d is not viable, please ensure " > - "all devices within the iommu_group are bound to their " > - "vfio bus driver.", groupid); > - close(group->fd); > - g_free(group); > - return NULL; > - } > - > - group->groupid = groupid; > - QLIST_INIT(&group->device_list); > - > - if (vfio_connect_container(group)) { > - error_report("vfio: failed to setup container for group %d", groupid); > - close(group->fd); > - g_free(group); > - return NULL; > - } > - > - if (QLIST_EMPTY(&group_list)) { > - qemu_register_reset(vfio_pci_reset_handler, NULL); > - } > - > - QLIST_INSERT_HEAD(&group_list, group, next); > - > - vfio_kvm_device_add_group(group); > - > - return group; > -} > - > -static void vfio_put_group(VFIOGroup *group) > -{ > - if (!QLIST_EMPTY(&group->device_list)) { > - return; > - } > - > - vfio_kvm_device_del_group(group); > - vfio_disconnect_container(group); > - QLIST_REMOVE(group, next); > - DPRINTF("vfio_put_group: close group->fd\n"); > - close(group->fd); > - g_free(group); > - > - if (QLIST_EMPTY(&group_list)) { > - qemu_unregister_reset(vfio_pci_reset_handler, NULL); > - } > -} > - > -static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev) > +static int vfio_get_device(VFIOGroup *group, const char *name, > + struct VFIODevice *vdev) Why? > { > struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; > struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) }; > @@ -3485,7 +3031,7 @@ static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev) > goto error; > } > > - DPRINTF("Device %s flags: %u, regions: %u, irgs: %u\n", name, > + DPRINTF("Device %s flags: %u, regions: %u, irqs: %u\n", name, ??? > dev_info.flags, dev_info.num_regions, dev_info.num_irqs); > > if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI)) { > @@ -3768,7 +3314,7 @@ static int vfio_initfn(PCIDevice *pdev) > DPRINTF("%s(%04x:%02x:%02x.%x) group %d\n", __func__, vdev->host.domain, > vdev->host.bus, vdev->host.slot, vdev->host.function, groupid); > > - group = vfio_get_group(groupid); > + group = vfio_get_group(groupid, vfio_pci_reset_handler); > if (!group) { > error_report("vfio: failed to get group %d", groupid); > return -ENOENT; > @@ -3785,7 +3331,7 @@ static int vfio_initfn(PCIDevice *pdev) > pvdev->host.function == vdev->host.function) { > > error_report("vfio: error: device %s is already attached", path); > - vfio_put_group(group); > + vfio_put_group(group, vfio_pci_reset_handler); > return -EBUSY; > } > } > @@ -3793,7 +3339,7 @@ static int vfio_initfn(PCIDevice *pdev) > ret = vfio_get_device(group, path, vdev); > if (ret) { > error_report("vfio: failed to get device %s", path); > - vfio_put_group(group); > + vfio_put_group(group, vfio_pci_reset_handler); > return ret; > } > > @@ -3879,7 +3425,7 @@ out_teardown: > out_put: > g_free(vdev->emulated_config_bits); > vfio_put_device(vdev); > - vfio_put_group(group); > + vfio_put_group(group, vfio_pci_reset_handler); > return ret; > } > > @@ -3899,7 +3445,7 @@ static void vfio_exitfn(PCIDevice *pdev) > g_free(vdev->emulated_config_bits); > g_free(vdev->rom); > vfio_put_device(vdev); > - vfio_put_group(group); > + vfio_put_group(group, vfio_pci_reset_handler); > } > > static void vfio_pci_reset(DeviceState *dev) > diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c > new file mode 100644 > index 0000000..138fb13 > --- /dev/null > +++ b/hw/vfio/platform.c > @@ -0,0 +1,381 @@ > +/* > + * vfio based device assignment support - platform devices > + * > + * Copyright Linaro Limited, 2014 > + * > + * Authors: > + * Kim Phillips <kim.phillips@xxxxxxxxxx> > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the COPYING file in the top-level directory. > + * > + * Based on vfio based PCI device assignment support: > + * Copyright Red Hat, Inc. 2012 > + */ > + > +#include <dirent.h> > +#include <linux/vfio.h> > +#include <sys/ioctl.h> > +#include <sys/mman.h> > +#include <sys/stat.h> > +#include <sys/types.h> > +#include <unistd.h> > + > +#include "config.h" > +#include "exec/address-spaces.h" > +#include "exec/memory.h" > +#include "qemu-common.h" > +#include "qemu/error-report.h" > +#include "qemu/event_notifier.h" > +#include "qemu/queue.h" > +#include "qemu/range.h" > +#include "sysemu/kvm.h" > +#include "sysemu/sysemu.h" > +#include "hw/qdev-properties.h" > +#include "migration/vmstate.h" > +#include "hw/hw.h" > +#include "hw/sysbus.h" > + > +#include "vfio-common.h" > + > +#define DEBUG_VFIO > +#ifdef DEBUG_VFIO > +#define DPRINTF(fmt, ...) \ > + do { fprintf(stderr, "vfio: " fmt, ## __VA_ARGS__); } while (0) > +#else > +#define DPRINTF(fmt, ...) \ > + do { } while (0) > +#endif > + > +/* Extra debugging, trap acceleration paths for more logging */ > +#define VFIO_ALLOW_MMAP 1 Maybe there should be a common debug section in vfio-common.h? > + > +#define TYPE_VFIO_PLATFORM "vfio-platform" > + > +typedef struct VFIORegion { > + off_t fd_offset; /* offset of region within device fd */ > + int fd; /* device fd, allows us to pass VFIORegion as opaque data */ > + MemoryRegion mem; /* slow, read/write access */ > + MemoryRegion mmap_mem; /* direct mapped access */ > + void *mmap; > + size_t size; > + uint32_t flags; /* VFIO region flags (rd/wr/mmap) */ > + uint8_t nr; /* cache the region number for debug */ > +} VFIORegion; > + > +typedef struct VFIODevice { > + SysBusDevice sbdev; > + int fd; > + int num_regions; > + VFIORegion *regions; > + QLIST_ENTRY(VFIODevice) next; > + struct VFIOGroup *group; > + char *name; > +} VFIODevice; I suspect we really need: (in vfio-common.h) typedef struct VFIODevice { QLIST_ENTRY(VFIODevice) next; struct VFIOGroup *group; enum VFIODeviceType; /* maybe some of the other common stuff */ } VFIODevice; (per pci/platform) typedef struct VFIOPlatformDevice { VFIODevice vdev; /* device specific stuff */ } VFIOPlatformDevice; Otherwise I don't see how the device_list on VFIOGroup works. > + > +static int vfio_mmap_region(VFIODevice *vdev, VFIORegion *region, > + MemoryRegion *mem, MemoryRegion *submem, > + void **map, size_t size, off_t offset, > + const char *name) > +{ > + int ret = 0; > + > + if (VFIO_ALLOW_MMAP && size && region->flags & VFIO_REGION_INFO_FLAG_MMAP) { > + int prot = 0; > + ret = 0; > + > + if (region->flags & VFIO_REGION_INFO_FLAG_READ) { > + prot |= PROT_READ; > + } > + > + if (region->flags & VFIO_REGION_INFO_FLAG_WRITE) { > + prot |= PROT_WRITE; > + } > + > + *map = mmap(NULL, size, prot, MAP_SHARED, > + region->fd, region->fd_offset + offset); > + if (*map == MAP_FAILED) { > + ret = -errno; > + *map = NULL; > + goto error; > + } > + > + memory_region_init_ram_ptr(submem, OBJECT(vdev), name, size, *map); > + } > + > + memory_region_add_subregion(mem, offset, submem); > + > +error: > + return ret; > +} > + > +/* > + * IO Port/MMIO - Beware of the endians, VFIO is always little endian > + */ > +static void vfio_region_write(void *opaque, hwaddr addr, > + uint64_t data, unsigned size) > +{ > + VFIORegion *region = opaque; > + union { > + uint8_t byte; > + uint16_t word; > + uint32_t dword; > + uint64_t qword; > + } buf; > + > + switch (size) { > + case 1: > + buf.byte = data; > + break; > + case 2: > + buf.word = data; > + break; > + case 4: > + buf.dword = data; > + break; > + default: > + hw_error("vfio: unsupported write size, %d bytes\n", size); > + break; > + } > + > + if (pwrite(region->fd, &buf, size, region->fd_offset + addr) != size) { > + error_report("%s(,0x%"HWADDR_PRIx", 0x%"PRIx64", %d) failed: %m", > + __func__, addr, data, size); > + } > + > + DPRINTF("%s(region %d+0x%"HWADDR_PRIx", 0x%"PRIx64", %d)\n", > + __func__, region->nr, addr, data, size); > +} > + > +static uint64_t vfio_region_read(void *opaque, hwaddr addr, unsigned size) > +{ > + VFIORegion *region = opaque; > + union { > + uint8_t byte; > + uint16_t word; > + uint32_t dword; > + uint64_t qword; > + } buf; > + uint64_t data = 0; > + > + if (pread(region->fd, &buf, size, region->fd_offset + addr) != size) { > + error_report("%s(,0x%"HWADDR_PRIx", %d) failed: %m", > + __func__, addr, size); > + return (uint64_t)-1; > + } > + > + switch (size) { > + case 1: > + data = buf.byte; > + break; > + case 2: > + data = buf.word; > + break; > + case 4: > + data = buf.dword; > + break; > + default: > + hw_error("vfio: unsupported read size, %d bytes\n", size); > + break; > + } > + > + DPRINTF("%s(region %d+0x%"HWADDR_PRIx", %d) = 0x%"PRIx64"\n", > + __func__, region->nr, addr, size, data); > + > + return data; > +} > + > +static const MemoryRegionOps vfio_region_ops = { > + .read = vfio_region_read, > + .write = vfio_region_write, > + .endianness = DEVICE_NATIVE_ENDIAN, > +}; > + > +static void vfio_map_region(VFIODevice *vdev, int nr) > +{ > + VFIORegion *region = &vdev->regions[nr]; > + unsigned size = region->size; > + char name[64]; > + > + snprintf(name, sizeof(name), "VFIO %s region %d", vdev->name, nr); > + > + /* A "slow" read/write mapping underlies all regions */ > + memory_region_init_io(®ion->mem, OBJECT(vdev), &vfio_region_ops, > + region, name, size); > + > + strncat(name, " mmap", sizeof(name) - strlen(name) - 1); > + if (vfio_mmap_region(vdev, region, ®ion->mem, > + ®ion->mmap_mem, ®ion->mmap, size, 0, name)) { > + error_report("%s unsupported. Performance may be slow", name); > + } > +} > + > +static int vfio_get_device(VFIOGroup *group, const char *name, > + struct VFIODevice *vdev) > +{ > + struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; > + struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) }; > + struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; > + int ret, i; > + > + ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); > + if (ret < 0) { > + error_report("vfio: error getting device %s from group %d: %m", > + name, group->groupid); > + error_printf("Verify all devices in group %d are bound to the vfio " > + "platform driver and are not already in use\n", > + group->groupid); > + return ret; > + } > + > + vdev->fd = ret; > + vdev->group = group; > + QLIST_INSERT_HEAD(&group->device_list, vdev, next); > + > + /* Sanity check device */ > + ret = ioctl(vdev->fd, VFIO_DEVICE_GET_INFO, &dev_info); > + if (ret) { > + error_report("vfio: error getting device info: %m"); > + goto error; > + } > + > + DPRINTF("Device %s flags: %u, regions: %u, irqs: %u\n", name, > + dev_info.flags, dev_info.num_regions, dev_info.num_irqs); > + > + vdev->regions = g_malloc0(sizeof(VFIORegion) * dev_info.num_regions); > + if (!vdev->regions) { > + error_report("vfio: Error allocating space for %d regions", > + dev_info.num_regions); > + ret = -ENOMEM; > + goto error; > + } > + > + vdev->num_regions = dev_info.num_regions; > + > + for (i = 0; i < dev_info.num_regions; i++) { > + reg_info.index = i; > + > + ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); > + if (ret) { > + error_report("vfio: Error getting region %d info: %m", i); > + goto error; > + } > + > + DPRINTF("Device %s region %d:\n", name, i); > + DPRINTF(" size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n", > + (unsigned long)reg_info.size, (unsigned long)reg_info.offset, > + (unsigned long)reg_info.flags); > + > + vdev->regions[i].flags = reg_info.flags; > + vdev->regions[i].size = reg_info.size; > + vdev->regions[i].fd_offset = reg_info.offset; > + vdev->regions[i].fd = vdev->fd; > + vdev->regions[i].nr = i; > + } Perhaps more of this could be in common.c if we had a generic VFIODevice embedded in a VFIOPlatformDevice or VFIOPCIDevice. We could fill in high-level things like number of regions/irqs, fd, name, etc, leaving interpretation and probing of each region/irq to device specific code. > + > +error: > + if (ret) { > + g_free(vdev->regions); > + QLIST_REMOVE(vdev, next); > + vdev->group = NULL; > + close(vdev->fd); > + } > + return ret; > +} > + > +static void vfio_platform_realize(DeviceState *dev, Error **errp) > +{ > + SysBusDevice *sbdev = SYS_BUS_DEVICE(dev); > + VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, sbdev, sbdev); > + VFIOGroup *group; > + char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name; > + ssize_t len; > + struct stat st; > + int groupid, i, ret; > + > + /* TODO: pass device name on command line */ > + vdev->name = malloc(PATH_MAX); > + strcpy(vdev->name, "fff51000.ethernet"); > + > + /* Check that the host device exists */ > + snprintf(path, sizeof(path), "/sys/bus/platform/devices/%s/", vdev->name); > + if (stat(path, &st) < 0) { > + error_report("vfio: error: no such host device: %s", path); > + return; > + } > + > + strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1); > + > + len = readlink(path, iommu_group_path, PATH_MAX); > + if (len <= 0) { > + error_report("vfio: error no iommu_group for device"); > + return; > + } > + > + iommu_group_path[len] = 0; > + group_name = basename(iommu_group_path); > + > + if (sscanf(group_name, "%d", &groupid) != 1) { > + error_report("vfio: error reading %s: %m", path); > + return; > + } > + > + DPRINTF("%s(%s) group %d\n", __func__, vdev->name, groupid); > + > + group = vfio_get_group(groupid, NULL); > + if (!group) { > + error_report("vfio: failed to get group %d", groupid); > + return; > + } > + > + snprintf(path, sizeof(path), "%s", vdev->name); > + > + QLIST_FOREACH(pvdev, &group->device_list, next) { > + if (strcmp(pvdev->name, vdev->name) == 0) { > + error_report("vfio: error: device %s is already attached", path); > + vfio_put_group(group, NULL); > + return; > + } > + } > + > + ret = vfio_get_device(group, path, vdev); > + if (ret) { > + error_report("vfio: failed to get device %s", path); > + vfio_put_group(group, NULL); > + return; > + } > + > + for (i = 0; i < vdev->num_regions; i++) { > + vfio_map_region(vdev, i); > + sysbus_init_mmio(sbdev, &vdev->regions[i].mem); > + } > +} > + > +static const VMStateDescription vfio_platform_vmstate = { > + .name = TYPE_VFIO_PLATFORM, > + .unmigratable = 1, > +}; > + > +static void vfio_platform_dev_class_init(ObjectClass *klass, void *data) > +{ > + DeviceClass *dc = DEVICE_CLASS(klass); > + > + dc->realize = vfio_platform_realize; > + dc->vmsd = &vfio_platform_vmstate; > + dc->desc = "VFIO-based platform device assignment"; > + set_bit(DEVICE_CATEGORY_MISC, dc->categories); > +} > + > +static const TypeInfo vfio_platform_dev_info = { > + .name = TYPE_VFIO_PLATFORM, > + .parent = TYPE_SYS_BUS_DEVICE, > + .instance_size = sizeof(VFIODevice), > + .class_init = vfio_platform_dev_class_init, > +}; > + > +static void register_vfio_platform_dev_type(void) > +{ > + type_register_static(&vfio_platform_dev_info); > +} > + > +type_init(register_vfio_platform_dev_type) > diff --git a/hw/vfio/vfio-common.h b/hw/vfio/vfio-common.h > new file mode 100644 > index 0000000..21148ef > --- /dev/null > +++ b/hw/vfio/vfio-common.h > @@ -0,0 +1,55 @@ > +/* > + * common header for vfio based device assignment support > + * > + * Copyright Red Hat, Inc. 2012 > + * > + * Authors: > + * Alex Williamson <alex.williamson@xxxxxxxxxx> > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the COPYING file in the top-level directory. > + * > + * Based on qemu-kvm device-assignment: > + * Adapted for KVM by Qumranet. > + * Copyright (c) 2007, Neocleus, Alex Novik (alex@xxxxxxxxxxxx) > + * Copyright (c) 2007, Neocleus, Guy Zana (guy@xxxxxxxxxxxx) > + * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@xxxxxxxxxxxx) > + * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@xxxxxxxxxx) > + * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@xxxxxxxxxx) > + */ > + > +struct VFIODevice; > + > +struct VFIOGroup; > + > +typedef struct VFIOType1 { > + MemoryListener listener; > + int error; > + bool initialized; > +} VFIOType1; > + > +typedef struct VFIOContainer { > + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ > + struct { > + /* enable abstraction to support various iommu backends */ > + union { > + VFIOType1 type1; > + }; > + void (*release)(struct VFIOContainer *); > + } iommu_data; > + QLIST_HEAD(, VFIOGroup) group_list; > + QLIST_ENTRY(VFIOContainer) next; > +} VFIOContainer; > + > +typedef struct VFIOGroup { > + int fd; > + int groupid; > + VFIOContainer *container; > + QLIST_HEAD(, VFIODevice) device_list; > + QLIST_ENTRY(VFIOGroup) next; > + QLIST_ENTRY(VFIOGroup) container_next; > +} VFIOGroup; > + > + > +VFIOGroup *vfio_get_group(int groupid, QEMUResetHandler *reset_handler); > +void vfio_put_group(VFIOGroup *group, QEMUResetHandler *reset_handler); _______________________________________________ kvmarm mailing list kvmarm@xxxxxxxxxxxxxxxxxxxxx https://lists.cs.columbia.edu/mailman/listinfo/kvmarm