Re: [PATCH v2 02/14] vfio: Simplify the lifetime logic for vfio_device

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,
On 3/13/21 1:55 AM, Jason Gunthorpe wrote:
> The vfio_device is using a 'sleep until all refs go to zero' pattern for
> its lifetime, but it is indirectly coded by repeatedly scanning the group
> list waiting for the device to be removed on its own.
> 
> Switch this around to be a direct representation, use a refcount to count
> the number of places that are blocking destruction and sleep directly on a
> completion until that counter goes to zero. kfree the device after other
> accesses have been excluded in vfio_del_group_dev(). This is a fairly
> common Linux idiom.
> 
> Due to this we can now remove kref_put_mutex(), which is very rarely used
> in the kernel. Here it is being used to prevent a zero ref device from
> being seen in the group list. Instead allow the zero ref device to
> continue to exist in the device_list and use refcount_inc_not_zero() to
> exclude it once refs go to zero.
> 
> This patch is organized so the next patch will be able to alter the API to
> allow drivers to provide the kfree.
> 
> Reviewed-by: Christoph Hellwig <hch@xxxxxx>
> Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
Reviewed-by: Eric Auger <eric.auger@xxxxxxxxxx>

Thanks

Eric

> ---
>  drivers/vfio/vfio.c | 79 ++++++++++++++-------------------------------
>  1 file changed, 25 insertions(+), 54 deletions(-)
> 
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 15d8e678e5563a..32660e8a69ae20 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -46,7 +46,6 @@ static struct vfio {
>  	struct mutex			group_lock;
>  	struct cdev			group_cdev;
>  	dev_t				group_devt;
> -	wait_queue_head_t		release_q;
>  } vfio;
>  
>  struct vfio_iommu_driver {
> @@ -91,7 +90,8 @@ struct vfio_group {
>  };
>  
>  struct vfio_device {
> -	struct kref			kref;
> +	refcount_t			refcount;
> +	struct completion		comp;
>  	struct device			*dev;
>  	const struct vfio_device_ops	*ops;
>  	struct vfio_group		*group;
> @@ -544,7 +544,8 @@ struct vfio_device *vfio_group_create_device(struct vfio_group *group,
>  	if (!device)
>  		return ERR_PTR(-ENOMEM);
>  
> -	kref_init(&device->kref);
> +	refcount_set(&device->refcount, 1);
> +	init_completion(&device->comp);
>  	device->dev = dev;
>  	/* Our reference on group is moved to the device */
>  	device->group = group;
> @@ -560,35 +561,17 @@ struct vfio_device *vfio_group_create_device(struct vfio_group *group,
>  	return device;
>  }
>  
> -static void vfio_device_release(struct kref *kref)
> -{
> -	struct vfio_device *device = container_of(kref,
> -						  struct vfio_device, kref);
> -	struct vfio_group *group = device->group;
> -
> -	list_del(&device->group_next);
> -	group->dev_counter--;
> -	mutex_unlock(&group->device_lock);
> -
> -	dev_set_drvdata(device->dev, NULL);
> -
> -	kfree(device);
> -
> -	/* vfio_del_group_dev may be waiting for this device */
> -	wake_up(&vfio.release_q);
> -}
> -
>  /* Device reference always implies a group reference */
>  void vfio_device_put(struct vfio_device *device)
>  {
> -	struct vfio_group *group = device->group;
> -	kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
> +	if (refcount_dec_and_test(&device->refcount))
> +		complete(&device->comp);
>  }
>  EXPORT_SYMBOL_GPL(vfio_device_put);
>  
> -static void vfio_device_get(struct vfio_device *device)
> +static bool vfio_device_try_get(struct vfio_device *device)
>  {
> -	kref_get(&device->kref);
> +	return refcount_inc_not_zero(&device->refcount);
>  }
>  
>  static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
> @@ -598,8 +581,7 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
>  
>  	mutex_lock(&group->device_lock);
>  	list_for_each_entry(device, &group->device_list, group_next) {
> -		if (device->dev == dev) {
> -			vfio_device_get(device);
> +		if (device->dev == dev && vfio_device_try_get(device)) {
>  			mutex_unlock(&group->device_lock);
>  			return device;
>  		}
> @@ -883,9 +865,8 @@ static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
>  			ret = !strcmp(dev_name(it->dev), buf);
>  		}
>  
> -		if (ret) {
> +		if (ret && vfio_device_try_get(it)) {
>  			device = it;
> -			vfio_device_get(device);
>  			break;
>  		}
>  	}
> @@ -908,13 +889,13 @@ EXPORT_SYMBOL_GPL(vfio_device_data);
>   * removed.  Open file descriptors for the device... */
>  void *vfio_del_group_dev(struct device *dev)
>  {
> -	DEFINE_WAIT_FUNC(wait, woken_wake_function);
>  	struct vfio_device *device = dev_get_drvdata(dev);
>  	struct vfio_group *group = device->group;
>  	void *device_data = device->device_data;
>  	struct vfio_unbound_dev *unbound;
>  	unsigned int i = 0;
>  	bool interrupted = false;
> +	long rc;
>  
>  	/*
>  	 * When the device is removed from the group, the group suddenly
> @@ -935,32 +916,18 @@ void *vfio_del_group_dev(struct device *dev)
>  	WARN_ON(!unbound);
>  
>  	vfio_device_put(device);
> -
> -	/*
> -	 * If the device is still present in the group after the above
> -	 * 'put', then it is in use and we need to request it from the
> -	 * bus driver.  The driver may in turn need to request the
> -	 * device from the user.  We send the request on an arbitrary
> -	 * interval with counter to allow the driver to take escalating
> -	 * measures to release the device if it has the ability to do so.
> -	 */
> -	add_wait_queue(&vfio.release_q, &wait);
> -
> -	do {
> -		device = vfio_group_get_device(group, dev);
> -		if (!device)
> -			break;
> -
> +	rc = try_wait_for_completion(&device->comp);
> +	while (rc <= 0) {
>  		if (device->ops->request)
>  			device->ops->request(device_data, i++);
>  
> -		vfio_device_put(device);
> -
>  		if (interrupted) {
> -			wait_woken(&wait, TASK_UNINTERRUPTIBLE, HZ * 10);
> +			rc = wait_for_completion_timeout(&device->comp,
> +							 HZ * 10);
>  		} else {
> -			wait_woken(&wait, TASK_INTERRUPTIBLE, HZ * 10);
> -			if (signal_pending(current)) {
> +			rc = wait_for_completion_interruptible_timeout(
> +				&device->comp, HZ * 10);
> +			if (rc < 0) {
>  				interrupted = true;
>  				dev_warn(dev,
>  					 "Device is currently in use, task"
> @@ -969,10 +936,13 @@ void *vfio_del_group_dev(struct device *dev)
>  					 current->comm, task_pid_nr(current));
>  			}
>  		}
> +	}
>  
> -	} while (1);
> +	mutex_lock(&group->device_lock);
> +	list_del(&device->group_next);
> +	group->dev_counter--;
> +	mutex_unlock(&group->device_lock);
>  
> -	remove_wait_queue(&vfio.release_q, &wait);
>  	/*
>  	 * In order to support multiple devices per group, devices can be
>  	 * plucked from the group while other devices in the group are still
> @@ -992,6 +962,8 @@ void *vfio_del_group_dev(struct device *dev)
>  
>  	/* Matches the get in vfio_group_create_device() */
>  	vfio_group_put(group);
> +	dev_set_drvdata(dev, NULL);
> +	kfree(device);
>  
>  	return device_data;
>  }
> @@ -2362,7 +2334,6 @@ static int __init vfio_init(void)
>  	mutex_init(&vfio.iommu_drivers_lock);
>  	INIT_LIST_HEAD(&vfio.group_list);
>  	INIT_LIST_HEAD(&vfio.iommu_drivers_list);
> -	init_waitqueue_head(&vfio.release_q);
>  
>  	ret = misc_register(&vfio_dev);
>  	if (ret) {
> 




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux