Hi Alex, On 07/02/18 01:26, Alex Williamson wrote: > With vfio ioeventfd support, we can program vfio-pci to perform a > specified BAR write when an eventfd is triggered. This allows the > KVM ioeventfd to be wired directly to vfio-pci, entirely avoiding > userspace handling for these events. On the same micro-benchmark > where the ioeventfd got us to almost 90% of performance versus > disabling the GeForce quirks, this gets us to within 95%. > > Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx> > --- > hw/vfio/pci-quirks.c | 42 ++++++++++++++++++++++++++++++++++++------ > 1 file changed, 36 insertions(+), 6 deletions(-) > > diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c > index e739efe601b1..35a4d5197e2d 100644 > --- a/hw/vfio/pci-quirks.c > +++ b/hw/vfio/pci-quirks.c > @@ -16,6 +16,7 @@ > #include "qemu/range.h" > #include "qapi/error.h" > #include "qapi/visitor.h" > +#include <sys/ioctl.h> > #include "hw/nvram/fw_cfg.h" > #include "pci.h" > #include "trace.h" > @@ -287,13 +288,27 @@ static VFIOQuirk *vfio_quirk_alloc(int nr_mem) > return quirk; > } > > -static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd) > +static void vfio_ioeventfd_exit(VFIOPCIDevice *vdev, VFIOIOEventFD *ioeventfd) > { > + struct vfio_device_ioeventfd vfio_ioeventfd; > + > QLIST_REMOVE(ioeventfd, next); > + > memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size, > ioeventfd->match_data, ioeventfd->data, > &ioeventfd->e); > + > qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e), NULL, NULL, NULL); > + > + vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd); > + vfio_ioeventfd.flags = ioeventfd->size; > + vfio_ioeventfd.data = ioeventfd->data; > + vfio_ioeventfd.offset = ioeventfd->region->fd_offset + > + ioeventfd->region_addr; > + vfio_ioeventfd.fd = -1; > + > + ioctl(vdev->vbasedev.fd, VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd); > + > event_notifier_cleanup(&ioeventfd->e); > g_free(ioeventfd); > } > @@ -315,6 +330,8 @@ static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev, > hwaddr region_addr) > { > VFIOIOEventFD *ioeventfd = g_malloc0(sizeof(*ioeventfd)); > + struct vfio_device_ioeventfd vfio_ioeventfd; > + char vfio_enabled = '+'; > > if (event_notifier_init(&ioeventfd->e, 0)) { > g_free(ioeventfd); > @@ -329,15 +346,28 @@ static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev, > ioeventfd->region = region; > ioeventfd->region_addr = region_addr; > > - qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e), > - vfio_ioeventfd_handler, NULL, ioeventfd); > + vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd); > + vfio_ioeventfd.flags = ioeventfd->size; > + vfio_ioeventfd.data = ioeventfd->data; > + vfio_ioeventfd.offset = ioeventfd->region->fd_offset + > + ioeventfd->region_addr; > + vfio_ioeventfd.fd = event_notifier_get_fd(&ioeventfd->e); > + > + if (ioctl(vdev->vbasedev.fd, > + VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd) != 0) { > + qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e), > + vfio_ioeventfd_handler, NULL, ioeventfd); > + vfio_enabled = '-'; > + } > + > memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr, > ioeventfd->size, ioeventfd->match_data, > ioeventfd->data, &ioeventfd->e); > > info_report("Enabled automatic ioeventfd acceleration for %s region %d, " > - "offset 0x%"HWADDR_PRIx", data 0x%"PRIx64", size %u", > - vdev->vbasedev.name, region->nr, region_addr, data, size); > + "offset 0x%"HWADDR_PRIx", data 0x%"PRIx64", size %u, vfio%c", > + vdev->vbasedev.name, region->nr, region_addr, data, size, > + vfio_enabled); Not sure if this message is really helpful for the end-user to understand what happens. Maybe adding a trace event when everything happens as it should and an error_report if we failed setting up the vfio kernel handler, explaining the sub-optimal performance that can result. Thanks Eric > > return ioeventfd; > } > @@ -1767,7 +1797,7 @@ void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr) > > QLIST_FOREACH(quirk, &bar->quirks, next) { > while (!QLIST_EMPTY(&quirk->ioeventfds)) { > - vfio_ioeventfd_exit(QLIST_FIRST(&quirk->ioeventfds)); > + vfio_ioeventfd_exit(vdev, QLIST_FIRST(&quirk->ioeventfds)); > } > > for (i = 0; i < quirk->nr_mem; i++) { >