s390x PCI devices need to use a special KVM-managed IOMMU domain as part of zPCI interpretation. To facilitate this, let a vfio device indicate that it wishes to use a KVM-managed IOMMU so that it can be reflected by the group and, ultimately, trigger a KVM-managed argument for the VFIO_SET_IOMMU ioctl. This patch sets up the framework to allow a device to trigger the VFIO_SET_IOMMU with the new KVM-owend type. A subsequent patch will add exploitation by s390x PCI. Signed-off-by: Matthew Rosato <mjrosato@xxxxxxxxxxxxx> --- hw/vfio/ap.c | 2 +- hw/vfio/ccw.c | 2 +- hw/vfio/common.c | 26 +++++++++++++++++++++----- hw/vfio/pci.c | 3 ++- hw/vfio/pci.h | 1 + hw/vfio/platform.c | 2 +- include/hw/vfio/vfio-common.h | 4 +++- 7 files changed, 30 insertions(+), 10 deletions(-) diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index e0dd561e85..22c402771a 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -81,7 +81,7 @@ static VFIOGroup *vfio_ap_get_group(VFIOAPDevice *vapdev, Error **errp) g_free(group_path); - return vfio_get_group(groupid, &address_space_memory, errp); + return vfio_get_group(groupid, &address_space_memory, false, errp); } static void vfio_ap_realize(DeviceState *dev, Error **errp) diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index 0354737666..08b0af5897 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -650,7 +650,7 @@ static VFIOGroup *vfio_ccw_get_group(S390CCWDevice *cdev, Error **errp) return NULL; } - return vfio_get_group(groupid, &address_space_memory, errp); + return vfio_get_group(groupid, &address_space_memory, false, errp); } static void vfio_ccw_realize(DeviceState *dev, Error **errp) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 080046e3f5..227880bf84 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -1873,7 +1873,7 @@ static int vfio_get_iommu_type(VFIOContainer *container, return -EINVAL; } -static int vfio_init_container(VFIOContainer *container, int group_fd, +static int vfio_init_container(VFIOContainer *container, VFIOGroup *group, Error **errp) { int iommu_type, ret; @@ -1883,12 +1883,20 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, return iommu_type; } - ret = ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container->fd); + ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd); if (ret) { error_setg_errno(errp, errno, "Failed to set group container"); return -errno; } + /* + * In the case where KVM will manage the IOMMU, we must instruct the host + * IOMMU to use the appropriate domain ops + */ + if (group->kvm_managed_iommu) { + iommu_type = VFIO_KVM_IOMMU; + } + while (ioctl(container->fd, VFIO_SET_IOMMU, iommu_type)) { if (iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { /* @@ -2062,7 +2070,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, QLIST_INIT(&container->hostwin_list); QLIST_INIT(&container->vrdl_list); - ret = vfio_init_container(container, group->fd, errp); + ret = vfio_init_container(container, group, errp); if (ret) { goto free_container_exit; } @@ -2265,7 +2273,8 @@ static void vfio_disconnect_container(VFIOGroup *group) } } -VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp) +VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, bool kvm_managed_iommu, + Error **errp) { VFIOGroup *group; char path[32]; @@ -2273,7 +2282,13 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp) QLIST_FOREACH(group, &vfio_group_list, next) { if (group->groupid == groupid) { - /* Found it. Now is it already in the right context? */ + /* Found it. Ensure using same IOMMU type */ + if (group->kvm_managed_iommu != kvm_managed_iommu) { + error_setg(errp, "group %d using conflicting iommu ops", + group->groupid); + return NULL; + } + /* Is it already in the right context? */ if (group->container->space->as == as) { return group; } else { @@ -2307,6 +2322,7 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp) } group->groupid = groupid; + group->kvm_managed_iommu = kvm_managed_iommu; QLIST_INIT(&group->device_list); if (vfio_connect_container(group, as, errp)) { diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 7b45353ce2..80f7e2880a 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2855,7 +2855,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) trace_vfio_realize(vdev->vbasedev.name, groupid); - group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev), errp); + group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev), + vdev->kvm_managed_iommu, errp); if (!group) { goto error; } diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 64777516d1..f74524384c 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -171,6 +171,7 @@ struct VFIOPCIDevice { bool no_kvm_ioeventfd; bool no_vfio_ioeventfd; bool enable_ramfb; + bool kvm_managed_iommu; VFIODisplay *dpy; Notifier irqchip_change_notifier; }; diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index f8f08a0f36..08793401dd 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -577,7 +577,7 @@ static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp) trace_vfio_platform_base_device_init(vbasedev->name, groupid); - group = vfio_get_group(groupid, &address_space_memory, errp); + group = vfio_get_group(groupid, &address_space_memory, false, errp); if (!group) { return -ENOENT; } diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 8af11b0a76..37aa6ca162 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -162,6 +162,7 @@ typedef struct VFIOGroup { QLIST_ENTRY(VFIOGroup) next; QLIST_ENTRY(VFIOGroup) container_next; bool ram_block_discard_allowed; + bool kvm_managed_iommu; } VFIOGroup; typedef struct VFIODMABuf { @@ -208,7 +209,8 @@ void vfio_region_unmap(VFIORegion *region); void vfio_region_exit(VFIORegion *region); void vfio_region_finalize(VFIORegion *region); void vfio_reset_handler(void *opaque); -VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp); +VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, bool kvm_managed_iommu, + Error **errp); void vfio_put_group(VFIOGroup *group); int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vbasedev, Error **errp); -- 2.27.0