> From: David Gibson [mailto:david@xxxxxxxxxxxxxxxxxxxxx] > Sent: Tuesday, October 29, 2019 8:16 PM > To: Liu, Yi L <yi.l.liu@xxxxxxxxx> > Subject: Re: [RFC v2 09/22] vfio/pci: add iommu_context notifier for pasid alloc/free > > On Thu, Oct 24, 2019 at 08:34:30AM -0400, Liu Yi L wrote: > > This patch adds pasid alloc/free notifiers for vfio-pci. It is > > supposed to be fired by vIOMMU. VFIO then sends PASID allocation > > or free request to host. > > > > Cc: Kevin Tian <kevin.tian@xxxxxxxxx> > > Cc: Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx> > > Cc: Peter Xu <peterx@xxxxxxxxxx> > > Cc: Eric Auger <eric.auger@xxxxxxxxxx> > > Cc: Yi Sun <yi.y.sun@xxxxxxxxxxxxxxx> > > Cc: David Gibson <david@xxxxxxxxxxxxxxxxxxxxx> > > Signed-off-by: Liu Yi L <yi.l.liu@xxxxxxxxx> > > --- > > hw/vfio/common.c | 9 ++++++ > > hw/vfio/pci.c | 81 > ++++++++++++++++++++++++++++++++++++++++++++++++ > > include/hw/iommu/iommu.h | 15 +++++++++ > > 3 files changed, 105 insertions(+) > > > > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > > index d418527..e6ad21c 100644 > > --- a/hw/vfio/common.c > > +++ b/hw/vfio/common.c > > @@ -1436,6 +1436,7 @@ static void vfio_disconnect_container(VFIOGroup > *group) > > if (QLIST_EMPTY(&container->group_list)) { > > VFIOAddressSpace *space = container->space; > > VFIOGuestIOMMU *giommu, *tmp; > > + VFIOIOMMUContext *giommu_ctx, *ctx; > > > > QLIST_REMOVE(container, next); > > > > @@ -1446,6 +1447,14 @@ static void vfio_disconnect_container(VFIOGroup > *group) > > g_free(giommu); > > } > > > > + QLIST_FOREACH_SAFE(giommu_ctx, &container->iommu_ctx_list, > > + iommu_ctx_next, ctx) { > > + iommu_ctx_notifier_unregister(giommu_ctx->iommu_ctx, > > + &giommu_ctx->n); > > + QLIST_REMOVE(giommu_ctx, iommu_ctx_next); > > + g_free(giommu_ctx); > > + } > > + > > trace_vfio_disconnect_container(container->fd); > > close(container->fd); > > g_free(container); > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > > index 12fac39..8721ff6 100644 > > --- a/hw/vfio/pci.c > > +++ b/hw/vfio/pci.c > > @@ -2699,11 +2699,80 @@ static void > vfio_unregister_req_notifier(VFIOPCIDevice *vdev) > > vdev->req_enabled = false; > > } > > > > +static void vfio_register_iommu_ctx_notifier(VFIOPCIDevice *vdev, > > + IOMMUContext *iommu_ctx, > > + IOMMUCTXNotifyFn fn, > > + IOMMUCTXEvent event) > > +{ > > + VFIOContainer *container = vdev->vbasedev.group->container; > > + VFIOIOMMUContext *giommu_ctx; > > + > > + giommu_ctx = g_malloc0(sizeof(*giommu_ctx)); > > + giommu_ctx->container = container; > > + giommu_ctx->iommu_ctx = iommu_ctx; > > + QLIST_INSERT_HEAD(&container->iommu_ctx_list, > > + giommu_ctx, > > + iommu_ctx_next); > > + iommu_ctx_notifier_register(iommu_ctx, > > + &giommu_ctx->n, > > + fn, > > + event); > > +} > > + > > +static void vfio_iommu_pasid_alloc_notify(IOMMUCTXNotifier *n, > > + IOMMUCTXEventData *event_data) > > +{ > > + VFIOIOMMUContext *giommu_ctx = container_of(n, VFIOIOMMUContext, n); > > + VFIOContainer *container = giommu_ctx->container; > > + IOMMUCTXPASIDReqDesc *pasid_req = > > + (IOMMUCTXPASIDReqDesc *) event_data->data; > > + struct vfio_iommu_type1_pasid_request req; > > + unsigned long argsz; > > + int pasid; > > + > > + argsz = sizeof(req); > > + req.argsz = argsz; > > + req.flag = VFIO_IOMMU_PASID_ALLOC; > > + req.min_pasid = pasid_req->min_pasid; > > + req.max_pasid = pasid_req->max_pasid; > > + > > + pasid = ioctl(container->fd, VFIO_IOMMU_PASID_REQUEST, &req); > > + if (pasid < 0) { > > + error_report("%s: %d, alloc failed", __func__, -errno); > > + } > > + pasid_req->alloc_result = pasid; > > Altering the event data from the notifier doesn't make sense. By > definition there can be multiple notifiers on the chain, so in that > case which one is responsible for updating the writable field? I guess you mean multiple pasid_alloc nofitiers. right? It works for VT-d now, as Intel vIOMMU maintains the IOMMUContext per-bdf. And there will be only 1 pasid_alloc notifier in the chain. But, I agree it is not good if other module just share an IOMMUConext across devices. Definitely, it would have multiple pasid_alloc notifiers. How about enforcing IOMMUContext layer to only invoke one successful pasid_alloc/free notifier if PASID_ALLOC/FREE event comes? pasid alloc/free are really special as it requires feedback. And a potential benefit is that the pasid_alloc/free will not be affected by hot plug scenario. There will be always a notifier to work for pasid_alloc/free work unless all passthru devices are hot plugged. How do you think? Or if any other idea? > > +} > > + > > +static void vfio_iommu_pasid_free_notify(IOMMUCTXNotifier *n, > > + IOMMUCTXEventData *event_data) > > +{ > > + VFIOIOMMUContext *giommu_ctx = container_of(n, VFIOIOMMUContext, n); > > + VFIOContainer *container = giommu_ctx->container; > > + IOMMUCTXPASIDReqDesc *pasid_req = > > + (IOMMUCTXPASIDReqDesc *) event_data->data; > > + struct vfio_iommu_type1_pasid_request req; > > + unsigned long argsz; > > + int ret = 0; > > + > > + argsz = sizeof(req); > > + req.argsz = argsz; > > + req.flag = VFIO_IOMMU_PASID_FREE; > > + req.pasid = pasid_req->pasid; > > + > > + ret = ioctl(container->fd, VFIO_IOMMU_PASID_REQUEST, &req); > > + if (ret != 0) { > > + error_report("%s: %d, pasid %u free failed", > > + __func__, -errno, (unsigned) pasid_req->pasid); > > + } > > + pasid_req->free_result = ret; > > Same problem here. yep, as above proposal. > > +} > > + > > static void vfio_realize(PCIDevice *pdev, Error **errp) > > { > > VFIOPCIDevice *vdev = PCI_VFIO(pdev); > > VFIODevice *vbasedev_iter; > > VFIOGroup *group; > > + IOMMUContext *iommu_context; > > char *tmp, *subsys, group_path[PATH_MAX], *group_name; > > Error *err = NULL; > > ssize_t len; > > @@ -3000,6 +3069,18 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) > > vfio_register_req_notifier(vdev); > > vfio_setup_resetfn_quirk(vdev); > > > > + iommu_context = pci_device_iommu_context(pdev); > > + if (iommu_context) { > > + vfio_register_iommu_ctx_notifier(vdev, > > + iommu_context, > > + vfio_iommu_pasid_alloc_notify, > > + IOMMU_CTX_EVENT_PASID_ALLOC); > > + vfio_register_iommu_ctx_notifier(vdev, > > + iommu_context, > > + vfio_iommu_pasid_free_notify, > > + IOMMU_CTX_EVENT_PASID_FREE); > > + } > > + > > return; > > > > out_teardown: > > diff --git a/include/hw/iommu/iommu.h b/include/hw/iommu/iommu.h > > index c22c442..4352afd 100644 > > --- a/include/hw/iommu/iommu.h > > +++ b/include/hw/iommu/iommu.h > > @@ -31,10 +31,25 @@ > > typedef struct IOMMUContext IOMMUContext; > > > > enum IOMMUCTXEvent { > > + IOMMU_CTX_EVENT_PASID_ALLOC, > > + IOMMU_CTX_EVENT_PASID_FREE, > > IOMMU_CTX_EVENT_NUM, > > }; > > typedef enum IOMMUCTXEvent IOMMUCTXEvent; > > > > +union IOMMUCTXPASIDReqDesc { > > + struct { > > + uint32_t min_pasid; > > + uint32_t max_pasid; > > + int32_t alloc_result; /* pasid allocated for the alloc request */ > > + }; > > + struct { > > + uint32_t pasid; /* pasid to be free */ > > + int free_result; > > + }; > > +}; > > Apart from theproblem with writable fields, using a big union for > event data is pretty ugly. If you need this different information for > the different events, it might make more sense to have a separate > notifier chain with a separate call interface for each event type, > rather than trying to multiplex them together. sure, I'll de-couple them. Nice catch. Thanks, Yi Liu