Hi Pierre, > -----Original Message----- > From: Shameerali Kolothum Thodi > Sent: Wednesday, December 06, 2017 4:08 PM > To: alex.williamson@xxxxxxxxxx; eric.auger@xxxxxxxxxx; > pmorel@xxxxxxxxxxxxxxxxxx > Cc: kvm@xxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx; Linuxarm > <linuxarm@xxxxxxxxxx>; Shameerali Kolothum Thodi > <shameerali.kolothum.thodi@xxxxxxxxxx> > Subject: [RFC] vfio/type1: Add IOVA_RANGE capability support > > This patch allows the user-space to retrieve the supported > IOVA range(s), excluding any reserved regions. The implementation > is based on capability chains, added to the VFIO_IOMMU_GET_INFO ioctl. > > This is following the discussions here[1] and is based on the RFC patch[2]. > > ToDo: > - This currently derives the default supported iova range from the first > iommu domain. This needs to be changed to go through the domain_list > instead. > - Sync with Pierre's patch[3]. Thanks to Eric[1], came to know that you have posted a patch to retrieve the iommu aperture info. This RFC does a similar thing but try to take care of any reserved regions and adds to the capability chain. Please take a look and if there is a possibility to sync up your next revision and this, please let me know. Thanks, Shameer 1. https://patchwork.kernel.org/patch/10056967/ > 1.https://lists.gnu.org/archive/html/qemu-devel/2017-11/msg03651.html > 2.https://lists.linuxfoundation.org/pipermail/iommu/2016- > November/019002.html > 3.https://patchwork.kernel.org/patch/10084655/ > > Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@xxxxxxxxxx> > --- > drivers/vfio/vfio_iommu_type1.c | 172 > +++++++++++++++++++++++++++++++++++++++- > include/uapi/linux/vfio.h | 13 +++ > 2 files changed, 184 insertions(+), 1 deletion(-) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index e30e29a..72ca78a 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -28,6 +28,7 @@ > #include <linux/device.h> > #include <linux/fs.h> > #include <linux/iommu.h> > +#include <linux/list_sort.h> > #include <linux/module.h> > #include <linux/mm.h> > #include <linux/rbtree.h> > @@ -92,6 +93,12 @@ struct vfio_group { > struct list_head next; > }; > > +struct vfio_iommu_iova { > + struct list_head list; > + phys_addr_t start; > + phys_addr_t end; > +}; > + > /* > * Guest RAM pinning working set or DMA target > */ > @@ -1537,6 +1544,144 @@ static int vfio_domains_have_iommu_cache(struct > vfio_iommu *iommu) > return ret; > } > > +static int vfio_add_iova_cap(struct vfio_info_cap *caps, u64 start, u64 end) > +{ > + struct vfio_iommu_type1_info_cap_iova_range *cap; > + struct vfio_info_cap_header *header; > + > + header = vfio_info_cap_add(caps, sizeof(*cap), > + VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE, 1); > + if (IS_ERR(header)) > + return PTR_ERR(header); > + > + cap = container_of(header, > + struct vfio_iommu_type1_info_cap_iova_range, > + header); > + > + cap->start = start; > + cap->end = end; > + > + return 0; > +} > + > +static int vfio_insert_iova(phys_addr_t start, phys_addr_t end, > + struct list_head *head) > +{ > + struct vfio_iommu_iova *region; > + > + region = kzalloc(sizeof(*region), GFP_KERNEL); > + if (!region) > + return -ENOMEM; > + > + INIT_LIST_HEAD(®ion->list); > + region->start = start; > + region->end = end; > + > + list_add_tail(®ion->list, head); > + return 0; > +} > + > +/* > + * Check and update iova region list in case a reserved region > + * overlaps the iommu iova range. > + */ > +static int vfio_update_iommu_iova_range(phys_addr_t start, phys_addr_t end, > + struct list_head *iova) > +{ > + struct vfio_iommu_iova *node; > + phys_addr_t a, b; > + int ret = 0; > + > + if (list_empty(iova)) > + return -ENODEV; > + > + node = list_last_entry(iova, struct vfio_iommu_iova, list); > + a = node->start; > + b = node->end; > + > + /* No overlap */ > + if ((start > b) || (end < a)) > + return 0; > + > + if (start > a) > + ret = vfio_insert_iova(a, start - 1, &node->list); > + if (ret) > + goto done; > + if (end < b) > + ret = vfio_insert_iova(end + 1, b, &node->list); > + > +done: > + list_del(&node->list); > + kfree(node); > + > + return ret; > +} > + > +static int vfio_resv_cmp(void *priv, struct list_head *a, struct list_head *b) > +{ > + struct iommu_resv_region *ra, *rb; > + > + ra = container_of(a, struct iommu_resv_region, list); > + rb = container_of(b, struct iommu_resv_region, list); > + > + if (ra->start < rb->start) > + return -1; > + if (ra->start > rb->start) > + return 1; > + return 0; > +} > + > +static int vfio_build_iommu_iova_caps(struct vfio_iommu *iommu, > + struct vfio_info_cap *caps) > +{ > + struct iommu_resv_region *resv, *resv_next; > + struct vfio_iommu_iova *iova, *iova_next; > + struct list_head group_resv_regions, vfio_iova_regions; > + struct vfio_domain *domain; > + struct vfio_group *g; > + phys_addr_t start, end; > + int ret = 0; > + > + domain = list_first_entry(&iommu->domain_list, > + struct vfio_domain, next); > + /* Get the default iova range supported */ > + start = domain->domain->geometry.aperture_start; > + end = domain->domain->geometry.aperture_end; > + INIT_LIST_HEAD(&vfio_iova_regions); > + vfio_insert_iova(start, end, &vfio_iova_regions); > + > + /* Get reserved regions if any */ > + INIT_LIST_HEAD(&group_resv_regions); > + list_for_each_entry(g, &domain->group_list, next) > + iommu_get_group_resv_regions(g->iommu_group, > + &group_resv_regions); > + list_sort(NULL, &group_resv_regions, vfio_resv_cmp); > + > + /* Update iova range excluding reserved regions */ > + list_for_each_entry(resv, &group_resv_regions, list) { > + ret = vfio_update_iommu_iova_range(resv->start, > + resv->start + resv->length - 1, > + &vfio_iova_regions); > + if (ret) > + goto done; > + } > + > + list_for_each_entry(iova, &vfio_iova_regions, list) { > + ret = vfio_add_iova_cap(caps, iova->start, iova->end); > + if (ret) > + goto done; > + } > + > +done: > + list_for_each_entry_safe(resv, resv_next, &group_resv_regions, list) > + kfree(resv); > + > + list_for_each_entry_safe(iova, iova_next, &vfio_iova_regions, list) > + kfree(iova); > + > + return ret; > +} > + > static long vfio_iommu_type1_ioctl(void *iommu_data, > unsigned int cmd, unsigned long arg) > { > @@ -1558,8 +1703,10 @@ static long vfio_iommu_type1_ioctl(void > *iommu_data, > } > } else if (cmd == VFIO_IOMMU_GET_INFO) { > struct vfio_iommu_type1_info info; > + struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; > + int ret; > > - minsz = offsetofend(struct vfio_iommu_type1_info, > iova_pgsizes); > + minsz = offsetofend(struct vfio_iommu_type1_info, cap_offset); > > if (copy_from_user(&info, (void __user *)arg, minsz)) > return -EFAULT; > @@ -1571,6 +1718,29 @@ static long vfio_iommu_type1_ioctl(void > *iommu_data, > > info.iova_pgsizes = vfio_pgsize_bitmap(iommu); > > + ret = vfio_build_iommu_iova_caps(iommu, &caps); > + if (ret) > + return ret; > + > + if (caps.size) { > + info.flags |= VFIO_IOMMU_INFO_CAPS; > + if (info.argsz < sizeof(info) + caps.size) { > + info.argsz = sizeof(info) + caps.size; > + info.cap_offset = 0; > + } else { > + vfio_info_cap_shift(&caps, sizeof(info)); > + if (copy_to_user((void __user *)arg + > + sizeof(info), caps.buf, > + caps.size)) { > + kfree(caps.buf); > + return -EFAULT; > + } > + info.cap_offset = sizeof(info); > + } > + > + kfree(caps.buf); > + } > + > return copy_to_user((void __user *)arg, &info, minsz) ? > -EFAULT : 0; > > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > index e3301db..c4e338b 100644 > --- a/include/uapi/linux/vfio.h > +++ b/include/uapi/linux/vfio.h > @@ -517,7 +517,20 @@ struct vfio_iommu_type1_info { > __u32 argsz; > __u32 flags; > #define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */ > +#define VFIO_IOMMU_INFO_CAPS (1 << 1) /* Info supports caps */ > __u64 iova_pgsizes; /* Bitmap of supported page sizes */ > + __u32 cap_offset; /* Offset within info struct of first cap */ > + __u32 __resv; > +}; > + > +/* > + * The IOVA_RANGE capability allows to report the IOVA range(s), > + */ > +#define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE 1 > +struct vfio_iommu_type1_info_cap_iova_range { > + struct vfio_info_cap_header header; > + __u64 start; > + __u64 end; > }; > > #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) > -- > 1.9.1 >