The existing IOMMU requires VFIO_IOMMU_ENABLE call to enable actual use of the container (i.e. call DMA map/unmap) and this is where we check the rlimit for locked pages. It assumes that only as much memory as a default DMA window can be mapped. Every DMA map/unmap request will do pinning/unpinning of physical pages. New IOMMU will split physical pages pinning and TCE table update. It will require guest pages to be registered first and consequent map/unmap requests to work only with pre-registered memory. For the default single window case this means that the entire guest (instead of 2GB) needs to be pinned before using VFIO. However when a huge DMA window is added, no additional pinning will be required, otherwise it would be guest RAM + 2GB. This advertises v2 SPAPR TCE IOMMU and restricts what the userspace can do with v1 or v2 IOMMUs. Signed-off-by: Alexey Kardashevskiy <aik@xxxxxxxxx> --- Changes: v6: * enforced limitations imposed by the SPAPR TCE IOMMU version --- drivers/vfio/vfio_iommu_spapr_tce.c | 18 +++++++++++++++++- include/uapi/linux/vfio.h | 2 ++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 9d240b4..e191438 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -95,6 +95,7 @@ struct tce_container { bool enabled; unsigned long locked_pages; struct list_head mem_list; + bool v2; }; struct tce_memory { @@ -398,7 +399,7 @@ static void *tce_iommu_open(unsigned long arg) { struct tce_container *container; - if (arg != VFIO_SPAPR_TCE_IOMMU) { + if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) { pr_err("tce_vfio: Wrong IOMMU type\n"); return ERR_PTR(-EINVAL); } @@ -410,6 +411,8 @@ static void *tce_iommu_open(unsigned long arg) mutex_init(&container->lock); INIT_LIST_HEAD_RCU(&container->mem_list); + container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU; + return container; } @@ -580,6 +583,7 @@ static long tce_iommu_ioctl(void *iommu_data, case VFIO_CHECK_EXTENSION: switch (arg) { case VFIO_SPAPR_TCE_IOMMU: + case VFIO_SPAPR_TCE_v2_IOMMU: ret = 1; break; default: @@ -719,6 +723,9 @@ static long tce_iommu_ioctl(void *iommu_data, case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: { struct vfio_iommu_spapr_register_memory param; + if (!container->v2) + return -EPERM; + minsz = offsetofend(struct vfio_iommu_spapr_register_memory, size); @@ -741,6 +748,9 @@ static long tce_iommu_ioctl(void *iommu_data, case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: { struct vfio_iommu_spapr_register_memory param; + if (!container->v2) + return -EPERM; + minsz = offsetofend(struct vfio_iommu_spapr_register_memory, size); @@ -761,6 +771,9 @@ static long tce_iommu_ioctl(void *iommu_data, return 0; } case VFIO_IOMMU_ENABLE: + if (container->v2) + return -EPERM; + mutex_lock(&container->lock); ret = tce_iommu_enable(container); mutex_unlock(&container->lock); @@ -768,6 +781,9 @@ static long tce_iommu_ioctl(void *iommu_data, case VFIO_IOMMU_DISABLE: + if (container->v2) + return -EPERM; + mutex_lock(&container->lock); tce_iommu_disable(container); mutex_unlock(&container->lock); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index b17e120..fbc5286 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -36,6 +36,8 @@ /* Two-stage IOMMU */ #define VFIO_TYPE1_NESTING_IOMMU 6 /* Implies v2 */ +#define VFIO_SPAPR_TCE_v2_IOMMU 7 + /* * The IOCTL interface is designed for extensibility by embedding the * structure length (argsz) and flags into structures passed between -- 2.0.0 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html