Re: [PATCH v4 11/12] samples: add vfio-mdev-pci driver

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue,  7 Jan 2020 20:01:48 +0800
Liu Yi L <yi.l.liu@xxxxxxxxx> wrote:

> This patch adds sample driver named vfio-mdev-pci. It is to wrap
> a PCI device as a mediated device. For a pci device, once bound
> to vfio-mdev-pci driver, user space access of this device will
> go through vfio mdev framework. The usage of the device follows
> mdev management method. e.g. user should create a mdev before
> exposing the device to user-space.
> 
> Benefit of this new driver would be acting as a sample driver
> for recent changes from "vfio/mdev: IOMMU aware mediated device"
> patchset. Also it could be a good experiment driver for future
> device specific mdev migration support. This sample driver only
> supports singleton iommu groups, for non-singleton iommu groups,
> this sample driver doesn't work. It will fail when trying to assign
> the non-singleton iommu group to VMs.
> 
> To use this driver:
> a) build and load vfio-mdev-pci.ko module
>    execute "make menuconfig" and config CONFIG_SAMPLE_VFIO_MDEV_PCI
>    then load it with following command:
>    > sudo modprobe vfio
>    > sudo modprobe vfio-pci
>    > sudo insmod samples/vfio-mdev-pci/vfio-mdev-pci.ko  
> 
> b) unbind original device driver
>    e.g. use following command to unbind its original driver
>    > echo $dev_bdf > /sys/bus/pci/devices/$dev_bdf/driver/unbind  
> 
> c) bind vfio-mdev-pci driver to the physical device
>    > echo $vend_id $dev_id > /sys/bus/pci/drivers/vfio-mdev-pci/new_id  
> 
> d) check the supported mdev instances
>    > ls /sys/bus/pci/devices/$dev_bdf/mdev_supported_types/  
>      vfio-mdev-pci-type_name
>    > ls /sys/bus/pci/devices/$dev_bdf/mdev_supported_types/\  
>      vfio-mdev-pci-type_name/
>      available_instances  create  device_api  devices  name
> 
> e)  create mdev on this physical device (only 1 instance)
>    > echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1003" > \  
>      /sys/bus/pci/devices/$dev_bdf/mdev_supported_types/\
>      vfio-mdev-pci-type_name/create
> 
> f) passthru the mdev to guest
>    add the following line in QEMU boot command
>     -device vfio-pci,\
>      sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1003
> 
> g) destroy mdev
>    > echo 1 > /sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1003/\  
>      remove
> 
> Cc: Kevin Tian <kevin.tian@xxxxxxxxx>
> Cc: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
> Cc: Masahiro Yamada <yamada.masahiro@xxxxxxxxxxxxx>
> Suggested-by: Alex Williamson <alex.williamson@xxxxxxxxxx>
> Signed-off-by: Liu Yi L <yi.l.liu@xxxxxxxxx>
> ---
>  samples/Kconfig                       |  10 +
>  samples/Makefile                      |   1 +
>  samples/vfio-mdev-pci/Makefile        |   4 +
>  samples/vfio-mdev-pci/vfio_mdev_pci.c | 397 ++++++++++++++++++++++++++++++++++
>  4 files changed, 412 insertions(+)
>  create mode 100644 samples/vfio-mdev-pci/Makefile
>  create mode 100644 samples/vfio-mdev-pci/vfio_mdev_pci.c
> 
> diff --git a/samples/Kconfig b/samples/Kconfig
> index 9d236c3..50d207c 100644
> --- a/samples/Kconfig
> +++ b/samples/Kconfig
> @@ -190,5 +190,15 @@ config SAMPLE_INTEL_MEI
>  	help
>  	  Build a sample program to work with mei device.
>  
> +config SAMPLE_VFIO_MDEV_PCI
> +	tristate "Sample driver for wrapping PCI device as a mdev"
> +	select VFIO_PCI_COMMON
> +	select VFIO_PCI
> +	depends on VFIO_MDEV && VFIO_MDEV_DEVICE
> +	help
> +	  Sample driver for wrapping a PCI device as a mdev. Once bound to
> +	  this driver, device passthru should through mdev path.
> +
> +	  If you don't know what to do here, say N.
>  
>  endif # SAMPLES
> diff --git a/samples/Makefile b/samples/Makefile
> index 5ce50ef..84faced 100644
> --- a/samples/Makefile
> +++ b/samples/Makefile
> @@ -21,5 +21,6 @@ obj-$(CONFIG_SAMPLE_FTRACE_DIRECT)	+= ftrace/
>  obj-$(CONFIG_SAMPLE_TRACE_ARRAY)	+= ftrace/
>  obj-$(CONFIG_VIDEO_PCI_SKELETON)	+= v4l/
>  obj-y					+= vfio-mdev/
> +obj-y					+= vfio-mdev-pci/

I think we could just lump this into vfio-mdev rather than making
another directory.

>  subdir-$(CONFIG_SAMPLE_VFS)		+= vfs
>  obj-$(CONFIG_SAMPLE_INTEL_MEI)		+= mei/
> diff --git a/samples/vfio-mdev-pci/Makefile b/samples/vfio-mdev-pci/Makefile
> new file mode 100644
> index 0000000..41b2139
> --- /dev/null
> +++ b/samples/vfio-mdev-pci/Makefile
> @@ -0,0 +1,4 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +vfio-mdev-pci-y := vfio_mdev_pci.o
> +
> +obj-$(CONFIG_SAMPLE_VFIO_MDEV_PCI) += vfio-mdev-pci.o
> diff --git a/samples/vfio-mdev-pci/vfio_mdev_pci.c b/samples/vfio-mdev-pci/vfio_mdev_pci.c
> new file mode 100644
> index 0000000..b180356
> --- /dev/null
> +++ b/samples/vfio-mdev-pci/vfio_mdev_pci.c
> @@ -0,0 +1,397 @@
> +/*
> + * Copyright © 2020 Intel Corporation.
> + *     Author: Liu Yi L <yi.l.liu@xxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * Derived from original vfio_pci.c:
> + * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
> + *     Author: Alex Williamson <alex.williamson@xxxxxxxxxx>
> + *
> + * Derived from original vfio:
> + * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
> + * Author: Tom Lyon, pugs@xxxxxxxxx
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/device.h>
> +#include <linux/eventfd.h>
> +#include <linux/file.h>
> +#include <linux/interrupt.h>
> +#include <linux/iommu.h>
> +#include <linux/module.h>
> +#include <linux/mutex.h>
> +#include <linux/notifier.h>
> +#include <linux/pci.h>
> +#include <linux/pm_runtime.h>
> +#include <linux/slab.h>
> +#include <linux/types.h>
> +#include <linux/uaccess.h>
> +#include <linux/vfio.h>
> +#include <linux/vgaarb.h>
> +#include <linux/nospec.h>
> +#include <linux/mdev.h>
> +#include <linux/vfio_pci_common.h>
> +
> +#define DRIVER_VERSION  "0.1"
> +#define DRIVER_AUTHOR   "Liu Yi L <yi.l.liu@xxxxxxxxx>"
> +#define DRIVER_DESC     "VFIO Mdev PCI - Sample driver for PCI device as a mdev"
> +
> +#define VFIO_MDEV_PCI_NAME  "vfio-mdev-pci"
> +
> +static char ids[1024] __initdata;
> +module_param_string(ids, ids, sizeof(ids), 0);
> +MODULE_PARM_DESC(ids, "Initial PCI IDs to add to the vfio-mdev-pci driver, format is \"vendor:device[:subvendor[:subdevice[:class[:class_mask]]]]\" and multiple comma separated entries can be specified");
> +
> +static bool nointxmask;
> +module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR);
> +MODULE_PARM_DESC(nointxmask,
> +		  "Disable support for PCI 2.3 style INTx masking.  If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@xxxxxxxxxxxxxxx so the device can be fixed automatically via the broken_intx_masking flag.");
> +
> +#ifdef CONFIG_VFIO_PCI_VGA
> +static bool disable_vga;
> +module_param(disable_vga, bool, S_IRUGO);
> +MODULE_PARM_DESC(disable_vga, "Disable VGA resource access through vfio-mdev-pci");
> +#endif
> +
> +static bool disable_idle_d3;
> +module_param(disable_idle_d3, bool, S_IRUGO | S_IWUSR);
> +MODULE_PARM_DESC(disable_idle_d3,
> +		 "Disable using the PCI D3 low power state for idle, unused devices");
> +
> +static struct pci_driver vfio_mdev_pci_driver;
> +
> +static ssize_t
> +name_show(struct kobject *kobj, struct device *dev, char *buf)
> +{
> +	return sprintf(buf, "%s-type1\n", dev_name(dev));
> +}
> +
> +MDEV_TYPE_ATTR_RO(name);
> +
> +static ssize_t
> +available_instances_show(struct kobject *kobj, struct device *dev, char *buf)
> +{
> +	return sprintf(buf, "%d\n", 1);
> +}
> +
> +MDEV_TYPE_ATTR_RO(available_instances);
> +
> +static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
> +		char *buf)
> +{
> +	return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
> +}
> +
> +MDEV_TYPE_ATTR_RO(device_api);
> +
> +static struct attribute *vfio_mdev_pci_types_attrs[] = {
> +	&mdev_type_attr_name.attr,
> +	&mdev_type_attr_device_api.attr,
> +	&mdev_type_attr_available_instances.attr,
> +	NULL,
> +};
> +
> +static struct attribute_group vfio_mdev_pci_type_group1 = {
> +	.name  = "type1",
> +	.attrs = vfio_mdev_pci_types_attrs,
> +};
> +
> +struct attribute_group *vfio_mdev_pci_type_groups[] = {
> +	&vfio_mdev_pci_type_group1,
> +	NULL,
> +};
> +
> +struct vfio_mdev_pci {
> +	struct vfio_pci_device *vdev;
> +	struct mdev_device *mdev;
> +	unsigned long handle;
> +};
> +
> +static int vfio_mdev_pci_create(struct kobject *kobj, struct mdev_device *mdev)
> +{
> +	struct device *pdev;
> +	struct vfio_pci_device *vdev;
> +	struct vfio_mdev_pci *pmdev;
> +	int ret;
> +
> +	pdev = mdev_parent_dev(mdev);
> +	vdev = dev_get_drvdata(pdev);
> +	pmdev = kzalloc(sizeof(struct vfio_mdev_pci), GFP_KERNEL);
> +	if (pmdev == NULL) {
> +		ret = -EBUSY;
> +		goto out;
> +	}
> +
> +	pmdev->mdev = mdev;
> +	pmdev->vdev = vdev;
> +	mdev_set_drvdata(mdev, pmdev);
> +	ret = mdev_set_iommu_device(mdev_dev(mdev), pdev);
> +	if (ret) {
> +		pr_info("%s, failed to config iommu isolation for mdev: %s on pf: %s\n",
> +			__func__, dev_name(mdev_dev(mdev)), dev_name(pdev));
> +		goto out;
> +	}
> +
> +	pr_info("%s, creation succeeded for mdev: %s\n", __func__,
> +		     dev_name(mdev_dev(mdev)));
> +out:
> +	return ret;
> +}
> +
> +static int vfio_mdev_pci_remove(struct mdev_device *mdev)
> +{
> +	struct vfio_mdev_pci *pmdev = mdev_get_drvdata(mdev);
> +
> +	kfree(pmdev);
> +	pr_info("%s, succeeded for mdev: %s\n", __func__,
> +		     dev_name(mdev_dev(mdev)));
> +
> +	return 0;
> +}
> +
> +static int vfio_mdev_pci_open(struct mdev_device *mdev)
> +{
> +	struct vfio_mdev_pci *pmdev = mdev_get_drvdata(mdev);
> +	struct vfio_pci_device *vdev = pmdev->vdev;
> +	int ret = 0;
> +
> +	if (!try_module_get(THIS_MODULE))
> +		return -ENODEV;
> +
> +	vfio_pci_refresh_config(vdev, nointxmask, disable_idle_d3);
> +
> +	mutex_lock(&vdev->reflck->lock);
> +
> +	if (!vdev->refcnt) {
> +		ret = vfio_pci_enable(vdev);
> +		if (ret)
> +			goto error;
> +
> +		vfio_spapr_pci_eeh_open(vdev->pdev);
> +	}
> +	vdev->refcnt++;
> +error:
> +	mutex_unlock(&vdev->reflck->lock);
> +	if (!ret)
> +		pr_info("Succeeded to open mdev: %s on pf: %s\n",
> +		dev_name(mdev_dev(mdev)), dev_name(&pmdev->vdev->pdev->dev));
> +	else {
> +		pr_info("Failed to open mdev: %s on pf: %s\n",
> +		dev_name(mdev_dev(mdev)), dev_name(&pmdev->vdev->pdev->dev));
> +		module_put(THIS_MODULE);
> +	}
> +	return ret;
> +}
> +
> +static void vfio_mdev_pci_release(struct mdev_device *mdev)
> +{
> +	struct vfio_mdev_pci *pmdev = mdev_get_drvdata(mdev);
> +	struct vfio_pci_device *vdev = pmdev->vdev;
> +
> +	pr_info("Release mdev: %s on pf: %s\n",
> +		dev_name(mdev_dev(mdev)), dev_name(&pmdev->vdev->pdev->dev));
> +
> +	mutex_lock(&vdev->reflck->lock);
> +
> +	if (!(--vdev->refcnt)) {
> +		vfio_spapr_pci_eeh_release(vdev->pdev);
> +		vfio_pci_disable(vdev);
> +	}
> +
> +	mutex_unlock(&vdev->reflck->lock);
> +
> +	module_put(THIS_MODULE);
> +}

open() and release() here are almost identical between vfio_pci and
vfio_mdev_pci, which suggests maybe there should be common functions to
call into like we do for the below.

> +static long vfio_mdev_pci_ioctl(struct mdev_device *mdev, unsigned int cmd,
> +			     unsigned long arg)
> +{
> +	struct vfio_mdev_pci *pmdev = mdev_get_drvdata(mdev);
> +
> +	return vfio_pci_ioctl(pmdev->vdev, cmd, arg);
> +}
> +
> +static int vfio_mdev_pci_mmap(struct mdev_device *mdev,
> +				struct vm_area_struct *vma)
> +{
> +	struct vfio_mdev_pci *pmdev = mdev_get_drvdata(mdev);
> +
> +	return vfio_pci_mmap(pmdev->vdev, vma);
> +}
> +
> +static ssize_t vfio_mdev_pci_read(struct mdev_device *mdev, char __user *buf,
> +			size_t count, loff_t *ppos)
> +{
> +	struct vfio_mdev_pci *pmdev = mdev_get_drvdata(mdev);
> +
> +	return vfio_pci_read(pmdev->vdev, buf, count, ppos);
> +}
> +
> +static ssize_t vfio_mdev_pci_write(struct mdev_device *mdev,
> +				const char __user *buf,
> +				size_t count, loff_t *ppos)
> +{
> +	struct vfio_mdev_pci *pmdev = mdev_get_drvdata(mdev);
> +
> +	return vfio_pci_write(pmdev->vdev, (char __user *)buf, count, ppos);
> +}
> +
> +static const struct mdev_parent_ops vfio_mdev_pci_ops = {
> +	.supported_type_groups	= vfio_mdev_pci_type_groups,
> +	.create			= vfio_mdev_pci_create,
> +	.remove			= vfio_mdev_pci_remove,
> +
> +	.open			= vfio_mdev_pci_open,
> +	.release		= vfio_mdev_pci_release,
> +
> +	.read			= vfio_mdev_pci_read,
> +	.write			= vfio_mdev_pci_write,
> +	.mmap			= vfio_mdev_pci_mmap,
> +	.ioctl			= vfio_mdev_pci_ioctl,
> +};
> +
> +static int vfio_mdev_pci_driver_probe(struct pci_dev *pdev,
> +				       const struct pci_device_id *id)
> +{
> +	struct vfio_pci_device *vdev;
> +	int ret;
> +
> +	if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
> +		return -EINVAL;
> +
> +	/*
> +	 * Prevent binding to PFs with VFs enabled, this too easily allows
> +	 * userspace instance with VFs and PFs from the same device, which
> +	 * cannot work.  Disabling SR-IOV here would initiate removing the
> +	 * VFs, which would unbind the driver, which is prone to blocking
> +	 * if that VF is also in use by vfio-pci or vfio-mdev-pci. Just
> +	 * reject these PFs and let the user sort it out.
> +	 */
> +	if (pci_num_vf(pdev)) {
> +		pci_warn(pdev, "Cannot bind to PF with SR-IOV enabled\n");
> +		return -EBUSY;
> +	}
> +
> +	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
> +	if (!vdev)
> +		return -ENOMEM;
> +
> +	vdev->pdev = pdev;
> +	vdev->irq_type = VFIO_PCI_NUM_IRQS;
> +	mutex_init(&vdev->igate);
> +	spin_lock_init(&vdev->irqlock);
> +	mutex_init(&vdev->ioeventfds_lock);
> +	INIT_LIST_HEAD(&vdev->ioeventfds_list);
> +	vdev->nointxmask = nointxmask;
> +#ifdef CONFIG_VFIO_PCI_VGA
> +	vdev->disable_vga = disable_vga;
> +#endif
> +	vdev->disable_idle_d3 = disable_idle_d3;
> +
> +	pci_set_drvdata(pdev, vdev);
> +
> +	ret = vfio_pci_reflck_attach(vdev);
> +	if (ret) {
> +		pci_set_drvdata(pdev, NULL);
> +		kfree(vdev);
> +		return ret;
> +	}
> +
> +	if (vfio_pci_is_vga(pdev)) {
> +		vga_client_register(pdev, vdev, NULL, vfio_pci_set_vga_decode);
> +		vga_set_legacy_decoding(pdev,
> +					vfio_pci_set_vga_decode(vdev, false));
> +	}
> +
> +	vfio_pci_probe_power_state(vdev);
> +
> +	if (!vdev->disable_idle_d3) {
> +		/*
> +		 * pci-core sets the device power state to an unknown value at
> +		 * bootup and after being removed from a driver.  The only
> +		 * transition it allows from this unknown state is to D0, which
> +		 * typically happens when a driver calls pci_enable_device().
> +		 * We're not ready to enable the device yet, but we do want to
> +		 * be able to get to D3.  Therefore first do a D0 transition
> +		 * before going to D3.
> +		 */
> +		vfio_pci_set_power_state(vdev, PCI_D0);
> +		vfio_pci_set_power_state(vdev, PCI_D3hot);
> +	}

Ditto here and remove below, this seems like boilerplate that shouldn't
be duplicated per leaf module.  Thanks,

Alex


> +
> +	ret = mdev_register_device(&pdev->dev, &vfio_mdev_pci_ops);
> +	if (ret)
> +		pr_err("Cannot register mdev for device %s\n",
> +			dev_name(&pdev->dev));
> +	else
> +		pr_info("Wrap device %s as a mdev\n", dev_name(&pdev->dev));
> +
> +	return ret;
> +}
> +
> +static void vfio_mdev_pci_driver_remove(struct pci_dev *pdev)
> +{
> +	struct vfio_pci_device *vdev;
> +
> +	vdev = pci_get_drvdata(pdev);
> +	if (!vdev)
> +		return;
> +
> +	vfio_pci_reflck_put(vdev->reflck);
> +
> +	kfree(vdev->region);
> +	mutex_destroy(&vdev->ioeventfds_lock);
> +
> +	if (!disable_idle_d3)
> +		vfio_pci_set_power_state(vdev, PCI_D0);
> +
> +	kfree(vdev->pm_save);
> +
> +	if (vfio_pci_is_vga(pdev)) {
> +		vga_client_register(pdev, NULL, NULL, NULL);
> +		vga_set_legacy_decoding(pdev,
> +				VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
> +				VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM);
> +	}
> +
> +	kfree(vdev);
> +}
> +
> +static struct pci_driver vfio_mdev_pci_driver = {
> +	.name		= VFIO_MDEV_PCI_NAME,
> +	.id_table	= NULL, /* only dynamic ids */
> +	.probe		= vfio_mdev_pci_driver_probe,
> +	.remove		= vfio_mdev_pci_driver_remove,
> +	.err_handler	= &vfio_pci_err_handlers,
> +};
> +
> +static void __exit vfio_mdev_pci_cleanup(void)
> +{
> +	pci_unregister_driver(&vfio_mdev_pci_driver);
> +}
> +
> +static int __init vfio_mdev_pci_init(void)
> +{
> +	int ret;
> +
> +	/* Register and scan for devices */
> +	ret = pci_register_driver(&vfio_mdev_pci_driver);
> +	if (ret)
> +		return ret;
> +
> +	vfio_pci_fill_ids(ids, &vfio_mdev_pci_driver);
> +
> +	return 0;
> +}
> +
> +module_init(vfio_mdev_pci_init);
> +module_exit(vfio_mdev_pci_cleanup);
> +
> +MODULE_VERSION(DRIVER_VERSION);
> +MODULE_LICENSE("GPL v2");
> +MODULE_AUTHOR(DRIVER_AUTHOR);
> +MODULE_DESCRIPTION(DRIVER_DESC);





[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux