On Fri, 10 May 2019 10:22:35 +0200 Pierre Morel <pmorel@xxxxxxxxxxxxx> wrote: > We implement a capability intercafe for VFIO_IOMMU_GET_INFO and add the > first capability: VFIO_IOMMU_INFO_CAPABILITIES. > > When calling the ioctl, the user must specify > VFIO_IOMMU_INFO_CAPABILITIES to retrieve the capabilities and must check > in the answer if capabilities are supported. > Older kernel will not check nor set the VFIO_IOMMU_INFO_CAPABILITIES in > the flags of vfio_iommu_type1_info. > > The iommu get_attr callback will be called to retrieve the specific > attributes and fill the capabilities, VFIO_IOMMU_INFO_CAP_QFN for the > PCI query function attributes and VFIO_IOMMU_INFO_CAP_QGRP for the > PCI query function group. > > Signed-off-by: Pierre Morel <pmorel@xxxxxxxxxxxxx> > --- > drivers/vfio/vfio_iommu_type1.c | 95 ++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 94 insertions(+), 1 deletion(-) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index d0f731c..f7f8120 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -1658,6 +1658,70 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) > return ret; > } > > +int vfio_iommu_type1_caps(struct vfio_iommu *iommu, struct vfio_info_cap *caps, > + size_t size) > +{ > + struct vfio_domain *d; > + struct vfio_iommu_type1_info_block *info_fn; > + struct vfio_iommu_type1_info_block *info_grp; > + unsigned long total_size, fn_size, grp_size; > + int ret; > + > + d = list_first_entry(&iommu->domain_list, struct vfio_domain, next); > + if (!d) > + return -ENODEV; > + /* The size of these capabilities are device dependent */ > + fn_size = iommu_domain_get_attr(d->domain, > + DOMAIN_ATTR_ZPCI_FN_SIZE, NULL); > + if (fn_size < 0) > + return fn_size; What if non-Z archs want to use this? The function is architected specifically for this one use case, fail if any component is not there which means it requires a re-write to add further support. If ZPCI_FN_SIZE isn't support, move on to the next thing. > + fn_size += sizeof(struct vfio_info_cap_header); > + total_size = fn_size; Here too, total_size should be initialized to zero and each section += the size they'd like to add. > + > + grp_size = iommu_domain_get_attr(d->domain, > + DOMAIN_ATTR_ZPCI_GRP_SIZE, NULL); > + if (grp_size < 0) > + return grp_size; > + grp_size += sizeof(struct vfio_info_cap_header); > + total_size += grp_size; > + > + /* Tell caller to call us with a greater buffer */ > + if (total_size > size) { > + caps->size = total_size; > + return 0; > + } > + > + info_fn = kzalloc(fn_size, GFP_KERNEL); > + if (!info_fn) > + return -ENOMEM; Maybe fn_size was zero because we're not on Z. > + ret = iommu_domain_get_attr(d->domain, > + DOMAIN_ATTR_ZPCI_FN, &info_fn->data); Kernel internal structures != user api. Thanks, Alex > + if (ret < 0) > + return ret; > + > + info_fn->header.id = VFIO_IOMMU_INFO_CAP_QFN; > + > + ret = vfio_info_add_capability(caps, &info_fn->header, fn_size); > + if (ret) > + goto err_fn; > + > + info_grp = kzalloc(grp_size, GFP_KERNEL); > + if (!info_grp) > + goto err_fn; > + ret = iommu_domain_get_attr(d->domain, > + DOMAIN_ATTR_ZPCI_GRP, &info_grp->data); > + if (ret < 0) > + goto err_grp; > + info_grp->header.id = VFIO_IOMMU_INFO_CAP_QGRP; > + ret = vfio_info_add_capability(caps, &info_grp->header, grp_size); > + > +err_grp: > + kfree(info_grp); > +err_fn: > + kfree(info_fn); > + return ret; > +} > + > static long vfio_iommu_type1_ioctl(void *iommu_data, > unsigned int cmd, unsigned long arg) > { > @@ -1679,6 +1743,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, > } > } else if (cmd == VFIO_IOMMU_GET_INFO) { > struct vfio_iommu_type1_info info; > + struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; > + int ret; > > minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); > > @@ -1688,7 +1754,34 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, > if (info.argsz < minsz) > return -EINVAL; > > - info.flags = VFIO_IOMMU_INFO_PGSIZES; > + if (info.flags & VFIO_IOMMU_INFO_CAPABILITIES) { > + minsz = offsetofend(struct vfio_iommu_type1_info, > + cap_offset); > + if (info.argsz < minsz) > + return -EINVAL; > + ret = vfio_iommu_type1_caps(iommu, &caps, > + info.argsz - sizeof(info)); > + if (ret) > + return ret; > + } > + if (caps.size) { > + if (info.argsz < sizeof(info) + caps.size) { > + info.argsz = sizeof(info) + caps.size; > + info.cap_offset = 0; > + } else { > + if (copy_to_user((void __user *)arg + > + sizeof(info), caps.buf, > + caps.size)) { > + kfree(caps.buf); > + return -EFAULT; > + } > + > + info.cap_offset = sizeof(info); > + } > + kfree(caps.buf); > + } > + > + info.flags |= VFIO_IOMMU_INFO_PGSIZES; > > info.iova_pgsizes = vfio_pgsize_bitmap(iommu); >