VFIO MDEV driver registers with MDEV core driver. MDEV core driver creates mediated device and calls probe routine of MPCI VFIO driver. This driver adds mediated device to VFIO core module. Main aim of this module is to manage all VFIO APIs for each mediated device. Those are: - get VFIO device information about type of device, maximum number of regions and maximum number of interrupts supported. - get region information from vendor driver. - Get interrupt information and send interrupt configuration information to vendor driver. - Device reset - Trap and forward read/write for emulated regions. Signed-off-by: Kirti Wankhede <kwankhede@xxxxxxxxxx> Signed-off-by: Neo Jia <cjia@xxxxxxxxxx> Change-Id: I583f4734752971d3d112324d69e2508c88f359ec Reviewed-on: http://git-master/r/1175706 Reviewed-by: Automatic_Commit_Validation_User --- drivers/vfio/mdev/Kconfig | 6 + drivers/vfio/mdev/Makefile | 1 + drivers/vfio/mdev/vfio_mdev.c | 467 ++++++++++++++++++++++++++++++++++++ drivers/vfio/pci/vfio_pci_private.h | 6 +- 4 files changed, 477 insertions(+), 3 deletions(-) create mode 100644 drivers/vfio/mdev/vfio_mdev.c diff --git a/drivers/vfio/mdev/Kconfig b/drivers/vfio/mdev/Kconfig index a34fbc66f92f..703abd0a9bff 100644 --- a/drivers/vfio/mdev/Kconfig +++ b/drivers/vfio/mdev/Kconfig @@ -9,4 +9,10 @@ config VFIO_MDEV If you don't know what do here, say N. +config VFIO_MDEV_DEVICE + tristate "VFIO support for Mediated devices" + depends on VFIO && VFIO_MDEV + default n + help + VFIO based driver for mediated devices. diff --git a/drivers/vfio/mdev/Makefile b/drivers/vfio/mdev/Makefile index 56a75e689582..e5087ed83a34 100644 --- a/drivers/vfio/mdev/Makefile +++ b/drivers/vfio/mdev/Makefile @@ -2,4 +2,5 @@ mdev-y := mdev_core.o mdev_sysfs.o mdev_driver.o obj-$(CONFIG_VFIO_MDEV) += mdev.o +obj-$(CONFIG_VFIO_MDEV_DEVICE) += vfio_mdev.o diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c new file mode 100644 index 000000000000..28f13aeaa46b --- /dev/null +++ b/drivers/vfio/mdev/vfio_mdev.c @@ -0,0 +1,467 @@ +/* + * VFIO based Mediated PCI device driver + * + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * Author: Neo Jia <cjia@xxxxxxxxxx> + * Kirti Wankhede <kwankhede@xxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/uuid.h> +#include <linux/vfio.h> +#include <linux/iommu.h> +#include <linux/mdev.h> + +#include "mdev_private.h" + +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "NVIDIA Corporation" +#define DRIVER_DESC "VFIO based Mediated PCI device driver" + +struct vfio_mdev { + struct iommu_group *group; + struct mdev_device *mdev; + struct vfio_device_info dev_info; +}; + +static int vfio_mdev_open(void *device_data) +{ + int ret = 0; + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + return ret; +} + +static void vfio_mdev_close(void *device_data) +{ + module_put(THIS_MODULE); +} + +static int sparse_mmap_cap(struct vfio_info_cap *caps, void *cap_type) +{ + struct vfio_info_cap_header *header; + struct vfio_region_info_cap_sparse_mmap *sparse_cap, *sparse = cap_type; + size_t size; + + size = sizeof(*sparse) + sparse->nr_areas * sizeof(*sparse->areas); + header = vfio_info_cap_add(caps, size, + VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1); + if (IS_ERR(header)) + return PTR_ERR(header); + + sparse_cap = container_of(header, + struct vfio_region_info_cap_sparse_mmap, header); + sparse_cap->nr_areas = sparse->nr_areas; + memcpy(sparse_cap->areas, sparse->areas, + sparse->nr_areas * sizeof(*sparse->areas)); + return 0; +} + +static int region_type_cap(struct vfio_info_cap *caps, void *cap_type) +{ + struct vfio_info_cap_header *header; + struct vfio_region_info_cap_type *type_cap, *cap = cap_type; + + header = vfio_info_cap_add(caps, sizeof(*cap), + VFIO_REGION_INFO_CAP_TYPE, 1); + if (IS_ERR(header)) + return PTR_ERR(header); + + type_cap = container_of(header, struct vfio_region_info_cap_type, + header); + type_cap->type = cap->type; + type_cap->subtype = cap->type; + return 0; +} + +static long vfio_mdev_unlocked_ioctl(void *device_data, + unsigned int cmd, unsigned long arg) +{ + int ret = 0; + struct vfio_mdev *vmdev = device_data; + struct parent_device *parent = vmdev->mdev->parent; + unsigned long minsz; + + switch (cmd) { + case VFIO_DEVICE_GET_INFO: + { + struct vfio_device_info info; + + minsz = offsetofend(struct vfio_device_info, num_irqs); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + if (parent->ops->get_device_info) + ret = parent->ops->get_device_info(vmdev->mdev, &info); + else + return -EINVAL; + + if (ret) + return ret; + + if (parent->ops->reset) + info.flags |= VFIO_DEVICE_FLAGS_RESET; + + memcpy(&vmdev->dev_info, &info, sizeof(info)); + + return copy_to_user((void __user *)arg, &info, minsz); + } + case VFIO_DEVICE_GET_REGION_INFO: + { + struct vfio_region_info info; + struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; + u16 cap_type_id = 0; + void *cap_type = NULL; + + minsz = offsetofend(struct vfio_region_info, offset); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + if (parent->ops->get_region_info) + ret = parent->ops->get_region_info(vmdev->mdev, &info, + &cap_type_id, &cap_type); + else + return -EINVAL; + + if (ret) + return ret; + + if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && cap_type) { + switch (cap_type_id) { + case VFIO_REGION_INFO_CAP_SPARSE_MMAP: + ret = sparse_mmap_cap(&caps, cap_type); + if (ret) + return ret; + break; + + case VFIO_REGION_INFO_CAP_TYPE: + ret = region_type_cap(&caps, cap_type); + if (ret) + return ret; + break; + default: + return -EINVAL; + } + } + + if (caps.size) { + if (info.argsz < sizeof(info) + caps.size) { + info.argsz = sizeof(info) + caps.size; + info.cap_offset = 0; + } else { + vfio_info_cap_shift(&caps, sizeof(info)); + if (copy_to_user((void __user *)arg + + sizeof(info), caps.buf, + caps.size)) { + kfree(caps.buf); + return -EFAULT; + } + info.cap_offset = sizeof(info); + } + kfree(caps.buf); + } + + return copy_to_user((void __user *)arg, &info, minsz); + } + case VFIO_DEVICE_GET_IRQ_INFO: + { + struct vfio_irq_info info; + + minsz = offsetofend(struct vfio_irq_info, count); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if ((info.argsz < minsz) || + (info.index >= vmdev->dev_info.num_irqs)) + return -EINVAL; + + if (parent->ops->get_irq_info) + ret = parent->ops->get_irq_info(vmdev->mdev, &info); + else + return -EINVAL; + + if (ret) + return ret; + + if (info.count == -1) + return -EINVAL; + + return copy_to_user((void __user *)arg, &info, minsz); + } + case VFIO_DEVICE_SET_IRQS: + { + struct vfio_irq_set hdr; + u8 *data = NULL, *ptr = NULL; + + minsz = offsetofend(struct vfio_irq_set, count); + + if (copy_from_user(&hdr, (void __user *)arg, minsz)) + return -EFAULT; + + if ((hdr.argsz < minsz) || + (hdr.index >= vmdev->dev_info.num_irqs) || + (hdr.flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | + VFIO_IRQ_SET_ACTION_TYPE_MASK))) + return -EINVAL; + + if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) { + size_t size; + + if (hdr.flags & VFIO_IRQ_SET_DATA_BOOL) + size = sizeof(uint8_t); + else if (hdr.flags & VFIO_IRQ_SET_DATA_EVENTFD) + size = sizeof(int32_t); + else + return -EINVAL; + + if (hdr.argsz - minsz < hdr.count * size) + return -EINVAL; + + ptr = data = memdup_user((void __user *)(arg + minsz), + hdr.count * size); + if (IS_ERR(data)) + return PTR_ERR(data); + } + + if (parent->ops->set_irqs) + ret = parent->ops->set_irqs(vmdev->mdev, hdr.flags, + hdr.index, hdr.start, + hdr.count, data); + else + ret = -EINVAL; + + kfree(ptr); + return ret; + } + case VFIO_DEVICE_RESET: + { + if (parent->ops->reset) + return parent->ops->reset(vmdev->mdev); + + return -EINVAL; + } + } + return -ENOTTY; +} + +static ssize_t vfio_mdev_read(void *device_data, char __user *buf, + size_t count, loff_t *ppos) +{ + struct vfio_mdev *vmdev = device_data; + struct mdev_device *mdev = vmdev->mdev; + struct parent_device *parent = mdev->parent; + unsigned int done = 0; + int ret; + + if (!parent->ops->read) + return -EINVAL; + + while (count) { + size_t filled; + + if (count >= 4 && !(*ppos % 4)) { + u32 val; + + ret = parent->ops->read(mdev, (char *)&val, sizeof(val), + *ppos); + if (ret <= 0) + goto read_err; + + if (copy_to_user(buf, &val, sizeof(val))) + goto read_err; + + filled = 4; + } else if (count >= 2 && !(*ppos % 2)) { + u16 val; + + ret = parent->ops->read(mdev, (char *)&val, sizeof(val), + *ppos); + if (ret <= 0) + goto read_err; + + if (copy_to_user(buf, &val, sizeof(val))) + goto read_err; + + filled = 2; + } else { + u8 val; + + ret = parent->ops->read(mdev, &val, sizeof(val), *ppos); + if (ret <= 0) + goto read_err; + + if (copy_to_user(buf, &val, sizeof(val))) + goto read_err; + + filled = 1; + } + + count -= filled; + done += filled; + *ppos += filled; + buf += filled; + } + + return done; + +read_err: + return -EFAULT; +} + +static ssize_t vfio_mdev_write(void *device_data, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct vfio_mdev *vmdev = device_data; + struct mdev_device *mdev = vmdev->mdev; + struct parent_device *parent = mdev->parent; + unsigned int done = 0; + int ret; + + if (!parent->ops->write) + return -EINVAL; + + while (count) { + size_t filled; + + if (count >= 4 && !(*ppos % 4)) { + u32 val; + + if (copy_from_user(&val, buf, sizeof(val))) + goto write_err; + + ret = parent->ops->write(mdev, (char *)&val, + sizeof(val), *ppos); + if (ret <= 0) + goto write_err; + + filled = 4; + } else if (count >= 2 && !(*ppos % 2)) { + u16 val; + + if (copy_from_user(&val, buf, sizeof(val))) + goto write_err; + + ret = parent->ops->write(mdev, (char *)&val, + sizeof(val), *ppos); + if (ret <= 0) + goto write_err; + + filled = 2; + } else { + u8 val; + + if (copy_from_user(&val, buf, sizeof(val))) + goto write_err; + + ret = parent->ops->write(mdev, &val, sizeof(val), + *ppos); + if (ret <= 0) + goto write_err; + + filled = 1; + } + + count -= filled; + done += filled; + *ppos += filled; + buf += filled; + } + + return done; +write_err: + return -EFAULT; +} + +static int vfio_mdev_mmap(void *device_data, struct vm_area_struct *vma) +{ + struct vfio_mdev *vmdev = device_data; + struct mdev_device *mdev = vmdev->mdev; + struct parent_device *parent = mdev->parent; + + if (parent->ops->mmap) + return parent->ops->mmap(mdev, vma); + + return -EINVAL; +} + +static const struct vfio_device_ops vfio_mdev_dev_ops = { + .name = "vfio-mdev", + .open = vfio_mdev_open, + .release = vfio_mdev_close, + .ioctl = vfio_mdev_unlocked_ioctl, + .read = vfio_mdev_read, + .write = vfio_mdev_write, + .mmap = vfio_mdev_mmap, +}; + +int vfio_mdev_probe(struct device *dev) +{ + struct vfio_mdev *vmdev; + struct mdev_device *mdev = to_mdev_device(dev); + int ret; + + vmdev = kzalloc(sizeof(*vmdev), GFP_KERNEL); + if (IS_ERR(vmdev)) + return PTR_ERR(vmdev); + + vmdev->mdev = mdev_get_device(mdev); + vmdev->group = mdev->group; + + ret = vfio_add_group_dev(dev, &vfio_mdev_dev_ops, vmdev); + if (ret) + kfree(vmdev); + + mdev_put_device(mdev); + return ret; +} + +void vfio_mdev_remove(struct device *dev) +{ + struct vfio_mdev *vmdev; + + vmdev = vfio_del_group_dev(dev); + kfree(vmdev); +} + +struct mdev_driver vfio_mdev_driver = { + .name = "vfio_mdev", + .probe = vfio_mdev_probe, + .remove = vfio_mdev_remove, +}; + +static int __init vfio_mdev_init(void) +{ + return mdev_register_driver(&vfio_mdev_driver, THIS_MODULE); +} + +static void __exit vfio_mdev_exit(void) +{ + mdev_unregister_driver(&vfio_mdev_driver); +} + +module_init(vfio_mdev_init) +module_exit(vfio_mdev_exit) + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 016c14a1b454..776cc2b063d4 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -21,9 +21,9 @@ #define VFIO_PCI_OFFSET_SHIFT 40 -#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT) -#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) -#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) +#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT) +#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) +#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) /* Special capability IDs predefined access */ #define PCI_CAP_ID_INVALID 0xFF /* default raw access */ -- 2.7.0 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html