ping @Eric Farman. ccw is the only tricky player in this series. Please help take a look in case of any oversight here. > From: Tian, Kevin <kevin.tian@xxxxxxxxx> > Sent: Thursday, September 1, 2022 10:38 PM > > ccw is the only exception which cannot use vfio_alloc_device() because > its private device structure is designed to serve both mdev and parent. > Life cycle of the parent is managed by css_driver so vfio_ccw_private > must be allocated/freed in css_driver probe/remove path instead of > conforming to vfio core life cycle for mdev. > > Given that use a wait/completion scheme so the mdev remove path waits > after vfio_put_device() until receiving a completion notification from > @release. The completion indicates that all active references on > vfio_device have been released. > > After that point although free of vfio_ccw_private is delayed to > css_driver it's at least guaranteed to have no parallel reference on > released vfio device part from other code paths. > > memset() in @probe is removed. vfio_device is either already cleared > when probed for the first time or cleared in @release from last probe. > > The right fix is to introduce separate structures for mdev and parent, > but this won't happen in short term per prior discussions. > > Remove vfio_init/uninit_group_dev() as no user now. > > Suggested-by: Jason Gunthorpe <jgg@xxxxxxxxxx> > Signed-off-by: Kevin Tian <kevin.tian@xxxxxxxxx> > Reviewed-by: Jason Gunthorpe <jgg@xxxxxxxxxx> > --- > drivers/s390/cio/vfio_ccw_ops.c | 52 +++++++++++++++++++++++++---- > drivers/s390/cio/vfio_ccw_private.h | 3 ++ > drivers/vfio/vfio_main.c | 23 +++---------- > include/linux/vfio.h | 3 -- > 4 files changed, 53 insertions(+), 28 deletions(-) > > diff --git a/drivers/s390/cio/vfio_ccw_ops.c > b/drivers/s390/cio/vfio_ccw_ops.c > index 4a806a2273b5..9f8486c0d3d3 100644 > --- a/drivers/s390/cio/vfio_ccw_ops.c > +++ b/drivers/s390/cio/vfio_ccw_ops.c > @@ -87,6 +87,15 @@ static struct attribute_group *mdev_type_groups[] = { > NULL, > }; > > +static int vfio_ccw_mdev_init_dev(struct vfio_device *vdev) > +{ > + struct vfio_ccw_private *private = > + container_of(vdev, struct vfio_ccw_private, vdev); > + > + init_completion(&private->release_comp); > + return 0; > +} > + > static int vfio_ccw_mdev_probe(struct mdev_device *mdev) > { > struct vfio_ccw_private *private = dev_get_drvdata(mdev- > >dev.parent); > @@ -98,9 +107,9 @@ static int vfio_ccw_mdev_probe(struct mdev_device > *mdev) > if (atomic_dec_if_positive(&private->avail) < 0) > return -EPERM; > > - memset(&private->vdev, 0, sizeof(private->vdev)); > - vfio_init_group_dev(&private->vdev, &mdev->dev, > - &vfio_ccw_dev_ops); > + ret = vfio_init_device(&private->vdev, &mdev->dev, > &vfio_ccw_dev_ops); > + if (ret) > + return ret; > > VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: create\n", > private->sch->schid.cssid, > @@ -109,16 +118,33 @@ static int vfio_ccw_mdev_probe(struct > mdev_device *mdev) > > ret = vfio_register_emulated_iommu_dev(&private->vdev); > if (ret) > - goto err_atomic; > + goto err_put_vdev; > dev_set_drvdata(&mdev->dev, private); > return 0; > > -err_atomic: > - vfio_uninit_group_dev(&private->vdev); > +err_put_vdev: > + vfio_put_device(&private->vdev); > atomic_inc(&private->avail); > return ret; > } > > +static void vfio_ccw_mdev_release_dev(struct vfio_device *vdev) > +{ > + struct vfio_ccw_private *private = > + container_of(vdev, struct vfio_ccw_private, vdev); > + > + /* > + * We cannot free vfio_ccw_private here because it includes > + * parent info which must be free'ed by css driver. > + * > + * Use a workaround by memset'ing the core device part and > + * then notifying the remove path that all active references > + * to this device have been released. > + */ > + memset(vdev, 0, sizeof(*vdev)); > + complete(&private->release_comp); > +} > + > static void vfio_ccw_mdev_remove(struct mdev_device *mdev) > { > struct vfio_ccw_private *private = dev_get_drvdata(mdev- > >dev.parent); > @@ -130,7 +156,17 @@ static void vfio_ccw_mdev_remove(struct > mdev_device *mdev) > > vfio_unregister_group_dev(&private->vdev); > > - vfio_uninit_group_dev(&private->vdev); > + vfio_put_device(&private->vdev); > + /* > + * Wait for all active references on mdev are released so it > + * is safe to defer kfree() to a later point. > + * > + * TODO: the clean fix is to split parent/mdev info from ccw > + * private structure so each can be managed in its own life > + * cycle. > + */ > + wait_for_completion(&private->release_comp); > + > atomic_inc(&private->avail); > } > > @@ -592,6 +628,8 @@ static void vfio_ccw_mdev_request(struct vfio_device > *vdev, unsigned int count) > } > > static const struct vfio_device_ops vfio_ccw_dev_ops = { > + .init = vfio_ccw_mdev_init_dev, > + .release = vfio_ccw_mdev_release_dev, > .open_device = vfio_ccw_mdev_open_device, > .close_device = vfio_ccw_mdev_close_device, > .read = vfio_ccw_mdev_read, > diff --git a/drivers/s390/cio/vfio_ccw_private.h > b/drivers/s390/cio/vfio_ccw_private.h > index cd24b7fada91..63d9202b29c7 100644 > --- a/drivers/s390/cio/vfio_ccw_private.h > +++ b/drivers/s390/cio/vfio_ccw_private.h > @@ -88,6 +88,7 @@ struct vfio_ccw_crw { > * @req_trigger: eventfd ctx for signaling userspace to return device > * @io_work: work for deferral process of I/O handling > * @crw_work: work for deferral process of CRW handling > + * @release_comp: synchronization helper for vfio device release > */ > struct vfio_ccw_private { > struct vfio_device vdev; > @@ -113,6 +114,8 @@ struct vfio_ccw_private { > struct eventfd_ctx *req_trigger; > struct work_struct io_work; > struct work_struct crw_work; > + > + struct completion release_comp; > } __aligned(8); > > int vfio_ccw_sch_quiesce(struct subchannel *sch); > diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c > index c9d982131265..957d9f286550 100644 > --- a/drivers/vfio/vfio_main.c > +++ b/drivers/vfio/vfio_main.c > @@ -481,28 +481,13 @@ static struct vfio_device > *vfio_group_get_device(struct vfio_group *group, > /* > * VFIO driver API > */ > -void vfio_init_group_dev(struct vfio_device *device, struct device *dev, > - const struct vfio_device_ops *ops) > -{ > - init_completion(&device->comp); > - device->dev = dev; > - device->ops = ops; > -} > -EXPORT_SYMBOL_GPL(vfio_init_group_dev); > - > -void vfio_uninit_group_dev(struct vfio_device *device) > -{ > - vfio_release_device_set(device); > -} > -EXPORT_SYMBOL_GPL(vfio_uninit_group_dev); > - > /* Release helper called by vfio_put_device() */ > void vfio_device_release(struct kref *kref) > { > struct vfio_device *device = > container_of(kref, struct vfio_device, kref); > > - vfio_uninit_group_dev(device); > + vfio_release_device_set(device); > > /* > * kvfree() cannot be done here due to a life cycle mess in > @@ -560,7 +545,9 @@ int vfio_init_device(struct vfio_device *device, struct > device *dev, > { > int ret; > > - vfio_init_group_dev(device, dev, ops); > + init_completion(&device->comp); > + device->dev = dev; > + device->ops = ops; > > if (ops->init) { > ret = ops->init(device); > @@ -572,7 +559,7 @@ int vfio_init_device(struct vfio_device *device, struct > device *dev, > return 0; > > out_uninit: > - vfio_uninit_group_dev(device); > + vfio_release_device_set(device); > return ret; > } > EXPORT_SYMBOL_GPL(vfio_init_device); > diff --git a/include/linux/vfio.h b/include/linux/vfio.h > index e1e9e8352903..f03447c8774d 100644 > --- a/include/linux/vfio.h > +++ b/include/linux/vfio.h > @@ -160,9 +160,6 @@ static inline void vfio_put_device(struct vfio_device > *device) > kref_put(&device->kref, vfio_device_release); > } > > -void vfio_init_group_dev(struct vfio_device *device, struct device *dev, > - const struct vfio_device_ops *ops); > -void vfio_uninit_group_dev(struct vfio_device *device); > int vfio_register_group_dev(struct vfio_device *device); > int vfio_register_emulated_iommu_dev(struct vfio_device *device); > void vfio_unregister_group_dev(struct vfio_device *device); > -- > 2.21.3