On Mon, 16 Nov 2020 12:00:18 +0100 Eric Auger <eric.auger@xxxxxxxxxx> wrote: > From: "Liu, Yi L" <yi.l.liu@xxxxxxxxxxxxxxx> > > This patch adds an VFIO_IOMMU_SET_PASID_TABLE ioctl > which aims to pass the virtual iommu guest configuration > to the host. This latter takes the form of the so-called > PASID table. > > Signed-off-by: Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx> > Signed-off-by: Liu, Yi L <yi.l.liu@xxxxxxxxxxxxxxx> > Signed-off-by: Eric Auger <eric.auger@xxxxxxxxxx> > > --- > v11 -> v12: > - use iommu_uapi_set_pasid_table > - check SET and UNSET are not set simultaneously (Zenghui) > > v8 -> v9: > - Merge VFIO_IOMMU_ATTACH/DETACH_PASID_TABLE into a single > VFIO_IOMMU_SET_PASID_TABLE ioctl. > > v6 -> v7: > - add a comment related to VFIO_IOMMU_DETACH_PASID_TABLE > > v3 -> v4: > - restore ATTACH/DETACH > - add unwind on failure > > v2 -> v3: > - s/BIND_PASID_TABLE/SET_PASID_TABLE > > v1 -> v2: > - s/BIND_GUEST_STAGE/BIND_PASID_TABLE > - remove the struct device arg > --- > drivers/vfio/vfio_iommu_type1.c | 65 +++++++++++++++++++++++++++++++++ > include/uapi/linux/vfio.h | 19 ++++++++++ > 2 files changed, 84 insertions(+) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index 67e827638995..87ddd9e882dc 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -2587,6 +2587,41 @@ static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu, > return ret; > } > > +static void > +vfio_detach_pasid_table(struct vfio_iommu *iommu) > +{ > + struct vfio_domain *d; > + > + mutex_lock(&iommu->lock); > + list_for_each_entry(d, &iommu->domain_list, next) > + iommu_detach_pasid_table(d->domain); > + > + mutex_unlock(&iommu->lock); > +} > + > +static int > +vfio_attach_pasid_table(struct vfio_iommu *iommu, unsigned long arg) > +{ > + struct vfio_domain *d; > + int ret = 0; > + > + mutex_lock(&iommu->lock); > + > + list_for_each_entry(d, &iommu->domain_list, next) { > + ret = iommu_uapi_attach_pasid_table(d->domain, (void __user *)arg); > + if (ret) > + goto unwind; > + } > + goto unlock; > +unwind: > + list_for_each_entry_continue_reverse(d, &iommu->domain_list, next) { > + iommu_detach_pasid_table(d->domain); > + } > +unlock: This goto leap frog could be avoided with just: list_for_each_entry(d, &iommu->domain_list, next) { ret = iommu_uapi_attach_pasid_table(d->domain, (void __user *)arg); if (ret) { list_for_each_entry_continue_reverse(d, &iommu->domain_list, next) { iommu_detach_pasid_table(d->domain); } break; } } > + mutex_unlock(&iommu->lock); > + return ret; > +} > + > static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu, > struct vfio_info_cap *caps) > { > @@ -2747,6 +2782,34 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu, > -EFAULT : 0; > } > > +static int vfio_iommu_type1_set_pasid_table(struct vfio_iommu *iommu, > + unsigned long arg) > +{ > + struct vfio_iommu_type1_set_pasid_table spt; > + unsigned long minsz; > + int ret = -EINVAL; > + > + minsz = offsetofend(struct vfio_iommu_type1_set_pasid_table, flags); > + > + if (copy_from_user(&spt, (void __user *)arg, minsz)) > + return -EFAULT; > + > + if (spt.argsz < minsz) > + return -EINVAL; > + > + if (spt.flags & VFIO_PASID_TABLE_FLAG_SET && > + spt.flags & VFIO_PASID_TABLE_FLAG_UNSET) > + return -EINVAL; > + > + if (spt.flags & VFIO_PASID_TABLE_FLAG_SET) > + ret = vfio_attach_pasid_table(iommu, arg + minsz); > + else if (spt.flags & VFIO_PASID_TABLE_FLAG_UNSET) { > + vfio_detach_pasid_table(iommu); > + ret = 0; > + } This doesn't really validate that the other flag bits are zero, ex. user could pass flags = (1 << 8) | VFIO_PASID_TABLE_FLAG_SET and we'd just ignore the extra bit. So this probably needs to be: if (spt.flags == VFIO_PASID_TABLE_FLAG_SET) ret = vfio_attach_pasid_table(iommu, arg + minsz); else if (spt.flags == VFIO_PASID_TABLE_FLAG_UNSET) vfio_detach_pasid_table(iommu); Or otherwise validate that none of the other bits are set. It also seems cleaner to me to set the initial value of ret = 0 and end this with: else ret = -EINVAL; > + return ret; > +} > + > static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, > unsigned long arg) > { > @@ -2867,6 +2930,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, > return vfio_iommu_type1_unmap_dma(iommu, arg); > case VFIO_IOMMU_DIRTY_PAGES: > return vfio_iommu_type1_dirty_pages(iommu, arg); > + case VFIO_IOMMU_SET_PASID_TABLE: > + return vfio_iommu_type1_set_pasid_table(iommu, arg); > default: > return -ENOTTY; > } > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > index 2f313a238a8f..78ce3ce6c331 100644 > --- a/include/uapi/linux/vfio.h > +++ b/include/uapi/linux/vfio.h > @@ -14,6 +14,7 @@ > > #include <linux/types.h> > #include <linux/ioctl.h> > +#include <linux/iommu.h> > > #define VFIO_API_VERSION 0 > > @@ -1180,6 +1181,24 @@ struct vfio_iommu_type1_dirty_bitmap_get { > > #define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17) > > +/* > + * VFIO_IOMMU_SET_PASID_TABLE - _IOWR(VFIO_TYPE, VFIO_BASE + 22, We already reuse ioctl indexes between type1 and spapr (ex. +17 is either VFIO_IOMMU_DIRTY_PAGES or VFIO_IOMMU_SPAPR_REGISTER_MEMORY depending on the iommu type). I wonder if we should reuse +18 here instead. > + * struct vfio_iommu_type1_set_pasid_table) > + * > + * The SET operation passes a PASID table to the host while the > + * UNSET operation detaches the one currently programmed. Setting > + * a table while another is already programmed replaces the old table. > + */ > +struct vfio_iommu_type1_set_pasid_table { > + __u32 argsz; > + __u32 flags; > +#define VFIO_PASID_TABLE_FLAG_SET (1 << 0) > +#define VFIO_PASID_TABLE_FLAG_UNSET (1 << 1) > + struct iommu_pasid_table_config config; /* used on SET */ > +}; > + > +#define VFIO_IOMMU_SET_PASID_TABLE _IO(VFIO_TYPE, VFIO_BASE + 22) > + > /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ > > /*