Re: [PATCH V2 vfio 06/11] vfio: Introduce the DMA logging feature support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 14 Jul 2022 11:12:46 +0300
Yishai Hadas <yishaih@xxxxxxxxxx> wrote:

> Introduce the DMA logging feature support in the vfio core layer.
> 
> It includes the processing of the device start/stop/report DMA logging
> UAPIs and calling the relevant driver 'op' to do the work.
> 
> Specifically,
> Upon start, the core translates the given input ranges into an interval
> tree, checks for unexpected overlapping, non aligned ranges and then
> pass the translated input to the driver for start tracking the given
> ranges.
> 
> Upon report, the core translates the given input user space bitmap and
> page size into an IOVA kernel bitmap iterator. Then it iterates it and
> call the driver to set the corresponding bits for the dirtied pages in a
> specific IOVA range.
> 
> Upon stop, the driver is called to stop the previous started tracking.
> 
> The next patches from the series will introduce the mlx5 driver
> implementation for the logging ops.
> 
> Signed-off-by: Yishai Hadas <yishaih@xxxxxxxxxx>
> ---
>  drivers/vfio/Kconfig             |   1 +
>  drivers/vfio/pci/vfio_pci_core.c |   5 +
>  drivers/vfio/vfio_main.c         | 161 +++++++++++++++++++++++++++++++
>  include/linux/vfio.h             |  21 +++-
>  4 files changed, 186 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
> index 6130d00252ed..86c381ceb9a1 100644
> --- a/drivers/vfio/Kconfig
> +++ b/drivers/vfio/Kconfig
> @@ -3,6 +3,7 @@ menuconfig VFIO
>  	tristate "VFIO Non-Privileged userspace driver framework"
>  	select IOMMU_API
>  	select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
> +	select INTERVAL_TREE
>  	help
>  	  VFIO provides a framework for secure userspace device drivers.
>  	  See Documentation/driver-api/vfio.rst for more details.
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index 2efa06b1fafa..b6dabf398251 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -1862,6 +1862,11 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
>  			return -EINVAL;
>  	}
>  
> +	if (vdev->vdev.log_ops && !(vdev->vdev.log_ops->log_start &&
> +	    vdev->vdev.log_ops->log_stop &&
> +	    vdev->vdev.log_ops->log_read_and_clear))
> +		return -EINVAL;
> +
>  	/*
>  	 * Prevent binding to PFs with VFs enabled, the VFs might be in use
>  	 * by the host or other users.  We cannot capture the VFs if they
> diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> index bd84ca7c5e35..2414d827e3c8 100644
> --- a/drivers/vfio/vfio_main.c
> +++ b/drivers/vfio/vfio_main.c
> @@ -32,6 +32,8 @@
>  #include <linux/vfio.h>
>  #include <linux/wait.h>
>  #include <linux/sched/signal.h>
> +#include <linux/interval_tree.h>
> +#include <linux/iova_bitmap.h>
>  #include "vfio.h"
>  
>  #define DRIVER_VERSION	"0.3"
> @@ -1603,6 +1605,153 @@ static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
>  	return 0;
>  }
>  
> +#define LOG_MAX_RANGES 1024
> +
> +static int
> +vfio_ioctl_device_feature_logging_start(struct vfio_device *device,
> +					u32 flags, void __user *arg,
> +					size_t argsz)
> +{
> +	size_t minsz =
> +		offsetofend(struct vfio_device_feature_dma_logging_control,
> +			    ranges);
> +	struct vfio_device_feature_dma_logging_range __user *ranges;
> +	struct vfio_device_feature_dma_logging_control control;
> +	struct vfio_device_feature_dma_logging_range range;
> +	struct rb_root_cached root = RB_ROOT_CACHED;
> +	struct interval_tree_node *nodes;
> +	u32 nnodes;
> +	int i, ret;
> +
> +	if (!device->log_ops)
> +		return -ENOTTY;
> +
> +	ret = vfio_check_feature(flags, argsz,
> +				 VFIO_DEVICE_FEATURE_SET,
> +				 sizeof(control));
> +	if (ret != 1)
> +		return ret;
> +
> +	if (copy_from_user(&control, arg, minsz))
> +		return -EFAULT;
> +
> +	nnodes = control.num_ranges;
> +	if (!nnodes || nnodes > LOG_MAX_RANGES)
> +		return -EINVAL;

The latter looks more like an -E2BIG errno.  This is a hard coded
limit, but what are the heuristics?  Can a user introspect the limit?
Thanks,

Alex

> +
> +	ranges = u64_to_user_ptr(control.ranges);
> +	nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node),
> +			      GFP_KERNEL);
> +	if (!nodes)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < nnodes; i++) {
> +		if (copy_from_user(&range, &ranges[i], sizeof(range))) {
> +			ret = -EFAULT;
> +			goto end;
> +		}
> +		if (!IS_ALIGNED(range.iova, control.page_size) ||
> +		    !IS_ALIGNED(range.length, control.page_size)) {
> +			ret = -EINVAL;
> +			goto end;
> +		}
> +		nodes[i].start = range.iova;
> +		nodes[i].last = range.iova + range.length - 1;
> +		if (interval_tree_iter_first(&root, nodes[i].start,
> +					     nodes[i].last)) {
> +			/* Range overlapping */
> +			ret = -EINVAL;
> +			goto end;
> +		}
> +		interval_tree_insert(nodes + i, &root);
> +	}
> +
> +	ret = device->log_ops->log_start(device, &root, nnodes,
> +					 &control.page_size);
> +	if (ret)
> +		goto end;
> +
> +	if (copy_to_user(arg, &control, sizeof(control))) {
> +		ret = -EFAULT;
> +		device->log_ops->log_stop(device);
> +	}
> +
> +end:
> +	kfree(nodes);
> +	return ret;
> +}
> +
> +static int
> +vfio_ioctl_device_feature_logging_stop(struct vfio_device *device,
> +				       u32 flags, void __user *arg,
> +				       size_t argsz)
> +{
> +	int ret;
> +
> +	if (!device->log_ops)
> +		return -ENOTTY;
> +
> +	ret = vfio_check_feature(flags, argsz,
> +				 VFIO_DEVICE_FEATURE_SET, 0);
> +	if (ret != 1)
> +		return ret;
> +
> +	return device->log_ops->log_stop(device);
> +}
> +
> +static int
> +vfio_ioctl_device_feature_logging_report(struct vfio_device *device,
> +					 u32 flags, void __user *arg,
> +					 size_t argsz)
> +{
> +	size_t minsz =
> +		offsetofend(struct vfio_device_feature_dma_logging_report,
> +			    bitmap);
> +	struct vfio_device_feature_dma_logging_report report;
> +	struct iova_bitmap_iter iter;
> +	int ret;
> +
> +	if (!device->log_ops)
> +		return -ENOTTY;
> +
> +	ret = vfio_check_feature(flags, argsz,
> +				 VFIO_DEVICE_FEATURE_GET,
> +				 sizeof(report));
> +	if (ret != 1)
> +		return ret;
> +
> +	if (copy_from_user(&report, arg, minsz))
> +		return -EFAULT;
> +
> +	if (report.page_size < PAGE_SIZE)
> +		return -EINVAL;
> +
> +	iova_bitmap_init(&iter.dirty, report.iova, ilog2(report.page_size));
> +	ret = iova_bitmap_iter_init(&iter, report.iova, report.length,
> +				    u64_to_user_ptr(report.bitmap));
> +	if (ret)
> +		return ret;
> +
> +	for (; !iova_bitmap_iter_done(&iter);
> +	     iova_bitmap_iter_advance(&iter)) {
> +		ret = iova_bitmap_iter_get(&iter);
> +		if (ret)
> +			break;
> +
> +		ret = device->log_ops->log_read_and_clear(device,
> +			iova_bitmap_iova(&iter),
> +			iova_bitmap_length(&iter), &iter.dirty);
> +
> +		iova_bitmap_iter_put(&iter);
> +
> +		if (ret)
> +			break;
> +	}
> +
> +	iova_bitmap_iter_free(&iter);
> +	return ret;
> +}
> +
>  static int vfio_ioctl_device_feature(struct vfio_device *device,
>  				     struct vfio_device_feature __user *arg)
>  {
> @@ -1636,6 +1785,18 @@ static int vfio_ioctl_device_feature(struct vfio_device *device,
>  		return vfio_ioctl_device_feature_mig_device_state(
>  			device, feature.flags, arg->data,
>  			feature.argsz - minsz);
> +	case VFIO_DEVICE_FEATURE_DMA_LOGGING_START:
> +		return vfio_ioctl_device_feature_logging_start(
> +			device, feature.flags, arg->data,
> +			feature.argsz - minsz);
> +	case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP:
> +		return vfio_ioctl_device_feature_logging_stop(
> +			device, feature.flags, arg->data,
> +			feature.argsz - minsz);
> +	case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT:
> +		return vfio_ioctl_device_feature_logging_report(
> +			device, feature.flags, arg->data,
> +			feature.argsz - minsz);
>  	default:
>  		if (unlikely(!device->ops->device_feature))
>  			return -EINVAL;
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index 4d26e149db81..feed84d686ec 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -14,6 +14,7 @@
>  #include <linux/workqueue.h>
>  #include <linux/poll.h>
>  #include <uapi/linux/vfio.h>
> +#include <linux/iova_bitmap.h>
>  
>  struct kvm;
>  
> @@ -33,10 +34,11 @@ struct vfio_device {
>  	struct device *dev;
>  	const struct vfio_device_ops *ops;
>  	/*
> -	 * mig_ops is a static property of the vfio_device which must be set
> -	 * prior to registering the vfio_device.
> +	 * mig_ops/log_ops is a static property of the vfio_device which must
> +	 * be set prior to registering the vfio_device.
>  	 */
>  	const struct vfio_migration_ops *mig_ops;
> +	const struct vfio_log_ops *log_ops;
>  	struct vfio_group *group;
>  	struct vfio_device_set *dev_set;
>  	struct list_head dev_set_list;
> @@ -104,6 +106,21 @@ struct vfio_migration_ops {
>  				   enum vfio_device_mig_state *curr_state);
>  };
>  
> +/**
> + * @log_start: Optional callback to ask the device start DMA logging.
> + * @log_stop: Optional callback to ask the device stop DMA logging.
> + * @log_read_and_clear: Optional callback to ask the device read
> + *         and clear the dirty DMAs in some given range.
> + */
> +struct vfio_log_ops {
> +	int (*log_start)(struct vfio_device *device,
> +		struct rb_root_cached *ranges, u32 nnodes, u64 *page_size);
> +	int (*log_stop)(struct vfio_device *device);
> +	int (*log_read_and_clear)(struct vfio_device *device,
> +		unsigned long iova, unsigned long length,
> +		struct iova_bitmap *dirty);
> +};
> +
>  /**
>   * vfio_check_feature - Validate user input for the VFIO_DEVICE_FEATURE ioctl
>   * @flags: Arg from the device_feature op




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux