Split the vfio_pci driver into two parts, the 'struct pci_driver' (vfio_pci) and a library of code (vfio_pci_core) that helps creating a VFIO device on top of a PCI device. As before vfio_pci.ko continues to present the same interface under sysfs and this change should have no functional impact. vfio_pci_core exposes an interface that is similar to a typical Linux subsystem, in that a pci_driver doing probe() can setup a number of details and then create the VFIO char device. Allowing another module to provide the pci_driver allows that module to customize how VFIO is setup, inject its own operations, and easily extend vendor specific functionality. This is complementary to how VFIO's mediated devices work. Instead of custome device lifecycle managmenet and a special bus drivers using this approach will rely on the normal driver core lifecycle (e.g. bind/unbind) management and this is optimized to effectively support customization that is only making small modifications to what vfio_pci would do normally. This approach is also a pluggable alternative for the hard wired CONFIG_VFIO_PCI_IGD and CONFIG_VFIO_PCI_NVLINK2 "drivers" that are built into vfio-pci. Using this work all of that code can be moved to a dedicated device-specific modules and cleanly split out of the generic vfio_pci driver. Below is an example for adding new driver to vfio pci subsystem: +-------------------------------------------------+ | | | VFIO | | | +-------------------------------------------------+ +-------------------------------------------------+ | | | VFIO_PCI_CORE | | | +-------------------------------------------------+ +--------------+ +---------------+ +--------------+ | | | | | | | VFIO_PCI | | MLX5_VFIO_PCI | | IGD_VFIO_PCI | | | | | | | +--------------+ +---------------+ +--------------+ In this way mlx5_vfio_pci will use vfio_pci_core to register to vfio subsystem and also use the generic PCI functionality exported from it. Additionally it will add the needed vendor specific logic for HW specific features such as Live Migration. Same for the igd_vfio_pci that will add special extensions for Intel Graphics cards (GVT-d). Signed-off-by: Max Gurtovoy <mgurtovoy@xxxxxxxxxx> --- drivers/vfio/pci/Kconfig | 22 ++- drivers/vfio/pci/Makefile | 13 +- drivers/vfio/pci/vfio_pci.c | 247 ++++++++++++++++++++++++ drivers/vfio/pci/vfio_pci_core.c | 318 ++++++++----------------------- drivers/vfio/pci/vfio_pci_core.h | 113 +++++++---- 5 files changed, 423 insertions(+), 290 deletions(-) create mode 100644 drivers/vfio/pci/vfio_pci.c diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index ac3c1dd3edef..829e90a2e5a3 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only -config VFIO_PCI - tristate "VFIO support for PCI devices" +config VFIO_PCI_CORE + tristate "VFIO core support for PCI devices" depends on VFIO && PCI && EVENTFD select VFIO_VIRQFD select IRQ_BYPASS_MANAGER @@ -10,9 +10,17 @@ config VFIO_PCI If you don't know what to do here, say N. +config VFIO_PCI + tristate "VFIO support for PCI devices" + depends on VFIO_PCI_CORE + help + This provides a generic PCI support using the VFIO framework. + + If you don't know what to do here, say N. + config VFIO_PCI_VGA bool "VFIO PCI support for VGA devices" - depends on VFIO_PCI && X86 && VGA_ARB + depends on VFIO_PCI_CORE && X86 && VGA_ARB help Support for VGA extension to VFIO PCI. This exposes an additional region on VGA devices for accessing legacy VGA addresses used by @@ -21,16 +29,16 @@ config VFIO_PCI_VGA If you don't know what to do here, say N. config VFIO_PCI_MMAP - depends on VFIO_PCI + depends on VFIO_PCI_CORE def_bool y if !S390 config VFIO_PCI_INTX - depends on VFIO_PCI + depends on VFIO_PCI_CORE def_bool y if !S390 config VFIO_PCI_IGD bool "VFIO PCI extensions for Intel graphics (GVT-d)" - depends on VFIO_PCI && X86 + depends on VFIO_PCI_CORE && X86 default y help Support for Intel IGD specific extensions to enable direct @@ -42,6 +50,6 @@ config VFIO_PCI_IGD config VFIO_PCI_NVLINK2 def_bool y - depends on VFIO_PCI && PPC_POWERNV + depends on VFIO_PCI_CORE && PPC_POWERNV help VFIO PCI support for P9 Witherspoon machine with NVIDIA V100 GPUs diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index bbf8d7c8fc45..16e7d77d63ce 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -1,8 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only -vfio-pci-y := vfio_pci_core.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o -vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o -vfio-pci-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o -vfio-pci-$(CONFIG_S390) += vfio_pci_zdev.o - +obj-$(CONFIG_VFIO_PCI_CORE) += vfio-pci-core.o obj-$(CONFIG_VFIO_PCI) += vfio-pci.o + +vfio-pci-core-y := vfio_pci_core.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o +vfio-pci-core-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o +vfio-pci-core-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o +vfio-pci-core-$(CONFIG_S390) += vfio_pci_zdev.o + +vfio-pci-y := vfio_pci.o diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c new file mode 100644 index 000000000000..447c31f4e64e --- /dev/null +++ b/drivers/vfio/pci/vfio_pci.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020, Mellanox Technologies, Ltd. All rights reserved. + * + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson <alex.williamson@xxxxxxxxxx> + * + * Derived from original vfio: + * Copyright 2010 Cisco Systems, Inc. All rights reserved. + * Author: Tom Lyon, pugs@xxxxxxxxx + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/device.h> +#include <linux/eventfd.h> +#include <linux/file.h> +#include <linux/interrupt.h> +#include <linux/iommu.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/notifier.h> +#include <linux/pm_runtime.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/uaccess.h> + +#include "vfio_pci_core.h" + +#define DRIVER_VERSION "0.2" +#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@xxxxxxxxxx>" +#define DRIVER_DESC "VFIO PCI - User Level meta-driver" + +static char ids[1024] __initdata; +module_param_string(ids, ids, sizeof(ids), 0); +MODULE_PARM_DESC(ids, "Initial PCI IDs to add to the vfio driver, format is \"vendor:device[:subvendor[:subdevice[:class[:class_mask]]]]\" and multiple comma separated entries can be specified"); + +static bool enable_sriov; +#ifdef CONFIG_PCI_IOV +module_param(enable_sriov, bool, 0644); +MODULE_PARM_DESC(enable_sriov, "Enable support for SR-IOV configuration. Enabling SR-IOV on a PF typically requires support of the userspace PF driver, enabling VFs without such support may result in non-functional VFs or PF."); +#endif + +static bool disable_denylist; +module_param(disable_denylist, bool, 0444); +MODULE_PARM_DESC(disable_denylist, "Disable use of device denylist. Disabling the denylist allows binding to devices with known errata that may lead to exploitable stability or security issues when accessed by untrusted users."); + +static bool vfio_pci_dev_in_denylist(struct pci_dev *pdev) +{ + switch (pdev->vendor) { + case PCI_VENDOR_ID_INTEL: + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_QAT_C3XXX: + case PCI_DEVICE_ID_INTEL_QAT_C3XXX_VF: + case PCI_DEVICE_ID_INTEL_QAT_C62X: + case PCI_DEVICE_ID_INTEL_QAT_C62X_VF: + case PCI_DEVICE_ID_INTEL_QAT_DH895XCC: + case PCI_DEVICE_ID_INTEL_QAT_DH895XCC_VF: + return true; + default: + return false; + } + } + + return false; +} + +static bool vfio_pci_is_denylisted(struct pci_dev *pdev) +{ + if (!vfio_pci_dev_in_denylist(pdev)) + return false; + + if (disable_denylist) { + pci_warn(pdev, + "device denylist disabled - allowing device %04x:%04x.\n", + pdev->vendor, pdev->device); + return false; + } + + pci_warn(pdev, "%04x:%04x exists in vfio-pci device denylist, driver probing disallowed.\n", + pdev->vendor, pdev->device); + + return true; +} + +static void vfio_pci_release(void *device_data) +{ + struct vfio_pci_core_device *vdev = device_data; + + mutex_lock(&vdev->reflck->lock); + if (!(--vdev->refcnt)) { + vfio_pci_vf_token_user_add(vdev, -1); + vfio_pci_core_spapr_eeh_release(vdev); + vfio_pci_core_disable(vdev); + } + mutex_unlock(&vdev->reflck->lock); + + module_put(THIS_MODULE); +} + +static int vfio_pci_open(void *device_data) +{ + struct vfio_pci_core_device *vdev = device_data; + int ret = 0; + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + mutex_lock(&vdev->reflck->lock); + + if (!vdev->refcnt) { + ret = vfio_pci_core_enable(vdev); + if (ret) + goto error; + + vfio_pci_probe_mmaps(vdev); + vfio_pci_core_spapr_eeh_open(vdev); + vfio_pci_vf_token_user_add(vdev, 1); + } + vdev->refcnt++; +error: + mutex_unlock(&vdev->reflck->lock); + if (ret) + module_put(THIS_MODULE); + return ret; +} + +static const struct vfio_device_ops vfio_pci_ops = { + .name = "vfio-pci", + .open = vfio_pci_open, + .release = vfio_pci_release, + .ioctl = vfio_pci_core_ioctl, + .read = vfio_pci_core_read, + .write = vfio_pci_core_write, + .mmap = vfio_pci_core_mmap, + .request = vfio_pci_core_request, + .match = vfio_pci_core_match, +}; + +static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct vfio_pci_core_device *vdev; + + if (vfio_pci_is_denylisted(pdev)) + return -EINVAL; + + vdev = vfio_create_pci_device(pdev, &vfio_pci_ops); + if (IS_ERR(vdev)) + return PTR_ERR(vdev); + + return 0; +} + +static void vfio_pci_remove(struct pci_dev *pdev) +{ + vfio_destroy_pci_device(pdev); +} + +static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn) +{ + might_sleep(); + + if (!enable_sriov) + return -ENOENT; + + return vfio_pci_core_sriov_configure(pdev, nr_virtfn); +} + +static struct pci_driver vfio_pci_driver = { + .name = "vfio-pci", + .id_table = NULL, /* only dynamic ids */ + .probe = vfio_pci_probe, + .remove = vfio_pci_remove, + .sriov_configure = vfio_pci_sriov_configure, + .err_handler = &vfio_pci_core_err_handlers, +}; + +static void __exit vfio_pci_cleanup(void) +{ + pci_unregister_driver(&vfio_pci_driver); +} + +static void __init vfio_pci_fill_ids(void) +{ + char *p, *id; + int rc; + + /* no ids passed actually */ + if (ids[0] == '\0') + return; + + /* add ids specified in the module parameter */ + p = ids; + while ((id = strsep(&p, ","))) { + unsigned int vendor, device, subvendor = PCI_ANY_ID, + subdevice = PCI_ANY_ID, class = 0, class_mask = 0; + int fields; + + if (!strlen(id)) + continue; + + fields = sscanf(id, "%x:%x:%x:%x:%x:%x", + &vendor, &device, &subvendor, &subdevice, + &class, &class_mask); + + if (fields < 2) { + pr_warn("invalid id string \"%s\"\n", id); + continue; + } + + rc = pci_add_dynid(&vfio_pci_driver, vendor, device, + subvendor, subdevice, class, class_mask, 0); + if (rc) + pr_warn("failed to add dynamic id [%04x:%04x[%04x:%04x]] class %#08x/%08x (%d)\n", + vendor, device, subvendor, subdevice, + class, class_mask, rc); + else + pr_info("add [%04x:%04x[%04x:%04x]] class %#08x/%08x\n", + vendor, device, subvendor, subdevice, + class, class_mask); + } +} + +static int __init vfio_pci_init(void) +{ + int ret; + + /* Register and scan for devices */ + ret = pci_register_driver(&vfio_pci_driver); + if (ret) + return ret; + + vfio_pci_fill_ids(); + + if (disable_denylist) + pr_warn("device denylist disabled.\n"); + + return 0; +} + +module_init(vfio_pci_init); +module_exit(vfio_pci_cleanup); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 557a03528dcd..878a3609b916 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -23,7 +23,6 @@ #include <linux/slab.h> #include <linux/types.h> #include <linux/uaccess.h> -#include <linux/vfio.h> #include <linux/vgaarb.h> #include <linux/nospec.h> #include <linux/sched/mm.h> @@ -32,11 +31,7 @@ #define DRIVER_VERSION "0.2" #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@xxxxxxxxxx>" -#define DRIVER_DESC "VFIO PCI - User Level meta-driver" - -static char ids[1024] __initdata; -module_param_string(ids, ids, sizeof(ids), 0); -MODULE_PARM_DESC(ids, "Initial PCI IDs to add to the vfio driver, format is \"vendor:device[:subvendor[:subdevice[:class[:class_mask]]]]\" and multiple comma separated entries can be specified"); +#define DRIVER_DESC "core driver for VFIO based PCI devices" static bool nointxmask; module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR); @@ -54,16 +49,6 @@ module_param(disable_idle_d3, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(disable_idle_d3, "Disable using the PCI D3 low power state for idle, unused devices"); -static bool enable_sriov; -#ifdef CONFIG_PCI_IOV -module_param(enable_sriov, bool, 0644); -MODULE_PARM_DESC(enable_sriov, "Enable support for SR-IOV configuration. Enabling SR-IOV on a PF typically requires support of the userspace PF driver, enabling VFs without such support may result in non-functional VFs or PF."); -#endif - -static bool disable_denylist; -module_param(disable_denylist, bool, 0444); -MODULE_PARM_DESC(disable_denylist, "Disable use of device denylist. Disabling the denylist allows binding to devices with known errata that may lead to exploitable stability or security issues when accessed by untrusted users."); - static inline bool vfio_vga_disabled(void) { #ifdef CONFIG_VFIO_PCI_VGA @@ -73,44 +58,6 @@ static inline bool vfio_vga_disabled(void) #endif } -static bool vfio_pci_dev_in_denylist(struct pci_dev *pdev) -{ - switch (pdev->vendor) { - case PCI_VENDOR_ID_INTEL: - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_QAT_C3XXX: - case PCI_DEVICE_ID_INTEL_QAT_C3XXX_VF: - case PCI_DEVICE_ID_INTEL_QAT_C62X: - case PCI_DEVICE_ID_INTEL_QAT_C62X_VF: - case PCI_DEVICE_ID_INTEL_QAT_DH895XCC: - case PCI_DEVICE_ID_INTEL_QAT_DH895XCC_VF: - return true; - default: - return false; - } - } - - return false; -} - -static bool vfio_pci_is_denylisted(struct pci_dev *pdev) -{ - if (!vfio_pci_dev_in_denylist(pdev)) - return false; - - if (disable_denylist) { - pci_warn(pdev, - "device denylist disabled - allowing device %04x:%04x.\n", - pdev->vendor, pdev->device); - return false; - } - - pci_warn(pdev, "%04x:%04x exists in vfio-pci device denylist, driver probing disallowed.\n", - pdev->vendor, pdev->device); - - return true; -} - /* * Our VGA arbiter participation is limited since we don't know anything * about the device itself. However, if the device is the only VGA device @@ -155,7 +102,7 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev) return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA; } -static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev) +void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev) { struct resource *res; int i; @@ -222,6 +169,7 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev) vdev->bar_mmap_supported[bar] = false; } } +EXPORT_SYMBOL_GPL(vfio_pci_probe_mmaps); static void vfio_pci_try_bus_reset(struct vfio_pci_core_device *vdev); static void vfio_pci_disable(struct vfio_pci_core_device *vdev); @@ -309,7 +257,24 @@ int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, pci_power_t stat return ret; } -static int vfio_pci_enable(struct vfio_pci_core_device *vdev) +void vfio_pci_core_disable(struct vfio_pci_core_device *vdev) +{ + vfio_pci_disable(vdev); + + mutex_lock(&vdev->igate); + if (vdev->err_trigger) { + eventfd_ctx_put(vdev->err_trigger); + vdev->err_trigger = NULL; + } + if (vdev->req_trigger) { + eventfd_ctx_put(vdev->req_trigger); + vdev->req_trigger = NULL; + } + mutex_unlock(&vdev->igate); +} +EXPORT_SYMBOL_GPL(vfio_pci_core_disable); + +int vfio_pci_core_enable(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; int ret; @@ -407,14 +372,13 @@ static int vfio_pci_enable(struct vfio_pci_core_device *vdev) } } - vfio_pci_probe_mmaps(vdev); - return 0; disable_exit: vfio_pci_disable(vdev); return ret; } +EXPORT_SYMBOL_GPL(vfio_pci_core_enable); static void vfio_pci_disable(struct vfio_pci_core_device *vdev) { @@ -515,8 +479,6 @@ static void vfio_pci_disable(struct vfio_pci_core_device *vdev) vfio_pci_set_power_state(vdev, PCI_D3hot); } -static struct pci_driver vfio_pci_driver; - static struct vfio_pci_core_device *get_pf_vdev(struct vfio_pci_core_device *vdev, struct vfio_device **pf_dev) { @@ -529,7 +491,7 @@ static struct vfio_pci_core_device *get_pf_vdev(struct vfio_pci_core_device *vde if (!*pf_dev) return NULL; - if (pci_dev_driver(physfn) != &vfio_pci_driver) { + if (pci_dev_driver(physfn) != pci_dev_driver(vdev->pdev)) { vfio_device_put(*pf_dev); return NULL; } @@ -537,7 +499,7 @@ static struct vfio_pci_core_device *get_pf_vdev(struct vfio_pci_core_device *vde return vfio_device_data(*pf_dev); } -static void vfio_pci_vf_token_user_add(struct vfio_pci_core_device *vdev, int val) +void vfio_pci_vf_token_user_add(struct vfio_pci_core_device *vdev, int val) { struct vfio_device *pf_dev; struct vfio_pci_core_device *pf_vdev = get_pf_vdev(vdev, &pf_dev); @@ -552,60 +514,19 @@ static void vfio_pci_vf_token_user_add(struct vfio_pci_core_device *vdev, int va vfio_device_put(pf_dev); } +EXPORT_SYMBOL_GPL(vfio_pci_vf_token_user_add); -static void vfio_pci_release(void *device_data) +void vfio_pci_core_spapr_eeh_open(struct vfio_pci_core_device *vdev) { - struct vfio_pci_core_device *vdev = device_data; - - mutex_lock(&vdev->reflck->lock); - - if (!(--vdev->refcnt)) { - vfio_pci_vf_token_user_add(vdev, -1); - vfio_spapr_pci_eeh_release(vdev->pdev); - vfio_pci_disable(vdev); - - mutex_lock(&vdev->igate); - if (vdev->err_trigger) { - eventfd_ctx_put(vdev->err_trigger); - vdev->err_trigger = NULL; - } - if (vdev->req_trigger) { - eventfd_ctx_put(vdev->req_trigger); - vdev->req_trigger = NULL; - } - mutex_unlock(&vdev->igate); - } - - mutex_unlock(&vdev->reflck->lock); - - module_put(THIS_MODULE); + vfio_spapr_pci_eeh_open(vdev->pdev); } +EXPORT_SYMBOL_GPL(vfio_pci_core_spapr_eeh_open); -static int vfio_pci_open(void *device_data) +void vfio_pci_core_spapr_eeh_release(struct vfio_pci_core_device *vpdev) { - struct vfio_pci_core_device *vdev = device_data; - int ret = 0; - - if (!try_module_get(THIS_MODULE)) - return -ENODEV; - - mutex_lock(&vdev->reflck->lock); - - if (!vdev->refcnt) { - ret = vfio_pci_enable(vdev); - if (ret) - goto error; - - vfio_spapr_pci_eeh_open(vdev->pdev); - vfio_pci_vf_token_user_add(vdev, 1); - } - vdev->refcnt++; -error: - mutex_unlock(&vdev->reflck->lock); - if (ret) - module_put(THIS_MODULE); - return ret; + vfio_spapr_pci_eeh_release(vpdev->pdev); } +EXPORT_SYMBOL_GPL(vfio_pci_core_spapr_eeh_release); static int vfio_pci_get_irq_count(struct vfio_pci_core_device *vdev, int irq_type) { @@ -797,8 +718,8 @@ struct vfio_devices { int max_index; }; -static long vfio_pci_ioctl(void *device_data, - unsigned int cmd, unsigned long arg) +long vfio_pci_core_ioctl(void *device_data, unsigned int cmd, + unsigned long arg) { struct vfio_pci_core_device *vdev = device_data; unsigned long minsz; @@ -1401,6 +1322,7 @@ static long vfio_pci_ioctl(void *device_data, return -ENOTTY; } +EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl); static ssize_t vfio_pci_rw(void *device_data, char __user *buf, size_t count, loff_t *ppos, bool iswrite) @@ -1434,23 +1356,25 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf, return -EINVAL; } -static ssize_t vfio_pci_read(void *device_data, char __user *buf, - size_t count, loff_t *ppos) +ssize_t vfio_pci_core_read(void *device_data, char __user *buf, size_t count, + loff_t *ppos) { if (!count) return 0; return vfio_pci_rw(device_data, buf, count, ppos, false); } +EXPORT_SYMBOL_GPL(vfio_pci_core_read); -static ssize_t vfio_pci_write(void *device_data, const char __user *buf, - size_t count, loff_t *ppos) +ssize_t vfio_pci_core_write(void *device_data, const char __user *buf, + size_t count, loff_t *ppos) { if (!count) return 0; return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true); } +EXPORT_SYMBOL_GPL(vfio_pci_core_write); /* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */ static int vfio_pci_zap_and_vma_lock(struct vfio_pci_core_device *vdev, bool try) @@ -1646,7 +1570,7 @@ static const struct vm_operations_struct vfio_pci_mmap_ops = { .fault = vfio_pci_mmap_fault, }; -static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) +int vfio_pci_core_mmap(void *device_data, struct vm_area_struct *vma) { struct vfio_pci_core_device *vdev = device_data; struct pci_dev *pdev = vdev->pdev; @@ -1713,8 +1637,9 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) return 0; } +EXPORT_SYMBOL_GPL(vfio_pci_core_mmap); -static void vfio_pci_request(void *device_data, unsigned int count) +void vfio_pci_core_request(void *device_data, unsigned int count) { struct vfio_pci_core_device *vdev = device_data; struct pci_dev *pdev = vdev->pdev; @@ -1734,6 +1659,7 @@ static void vfio_pci_request(void *device_data, unsigned int count) mutex_unlock(&vdev->igate); } +EXPORT_SYMBOL_GPL(vfio_pci_core_request); static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, bool vf_token, uuid_t *uuid) @@ -1830,7 +1756,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, #define VF_TOKEN_ARG "vf_token=" -static int vfio_pci_match(void *device_data, char *buf) +int vfio_pci_core_match(void *device_data, char *buf) { struct vfio_pci_core_device *vdev = device_data; bool vf_token = false; @@ -1878,18 +1804,7 @@ static int vfio_pci_match(void *device_data, char *buf) return 1; /* Match */ } - -static const struct vfio_device_ops vfio_pci_ops = { - .name = "vfio-pci", - .open = vfio_pci_open, - .release = vfio_pci_release, - .ioctl = vfio_pci_ioctl, - .read = vfio_pci_read, - .write = vfio_pci_write, - .mmap = vfio_pci_mmap, - .request = vfio_pci_request, - .match = vfio_pci_match, -}; +EXPORT_SYMBOL_GPL(vfio_pci_core_match); static int vfio_pci_reflck_attach(struct vfio_pci_core_device *vdev); static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck); @@ -1908,12 +1823,12 @@ static int vfio_pci_bus_notifier(struct notifier_block *nb, pci_info(vdev->pdev, "Captured SR-IOV VF %s driver_override\n", pci_name(pdev)); pdev->driver_override = kasprintf(GFP_KERNEL, "%s", - vfio_pci_ops.name); + vdev->vfio_pci_ops->name); } else if (action == BUS_NOTIFY_BOUND_DRIVER && pdev->is_virtfn && physfn == vdev->pdev) { struct pci_driver *drv = pci_dev_driver(pdev); - if (drv && drv != &vfio_pci_driver) + if (drv && drv != pci_dev_driver(vdev->pdev)) pci_warn(vdev->pdev, "VF %s bound to driver %s while PF bound to vfio-pci\n", pci_name(pdev), drv->name); @@ -1922,17 +1837,15 @@ static int vfio_pci_bus_notifier(struct notifier_block *nb, return 0; } -static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +struct vfio_pci_core_device *vfio_create_pci_device(struct pci_dev *pdev, + const struct vfio_device_ops *vfio_pci_ops) { struct vfio_pci_core_device *vdev; struct iommu_group *group; int ret; - if (vfio_pci_is_denylisted(pdev)) - return -EINVAL; - if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) - return -EINVAL; + return ERR_PTR(-EINVAL); /* * Prevent binding to PFs with VFs enabled, the VFs might be in use @@ -1944,12 +1857,12 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) */ if (pci_num_vf(pdev)) { pci_warn(pdev, "Cannot bind to PF with SR-IOV enabled\n"); - return -EBUSY; + return ERR_PTR(-EBUSY); } group = vfio_iommu_group_get(&pdev->dev); if (!group) - return -EINVAL; + return ERR_PTR(-EINVAL); vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); if (!vdev) { @@ -1958,6 +1871,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) } vdev->pdev = pdev; + vdev->vfio_pci_ops = vfio_pci_ops; vdev->irq_type = VFIO_PCI_NUM_IRQS; mutex_init(&vdev->igate); spin_lock_init(&vdev->irqlock); @@ -1968,7 +1882,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) INIT_LIST_HEAD(&vdev->vma_list); init_rwsem(&vdev->memory_lock); - ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev); + ret = vfio_add_group_dev(&pdev->dev, vfio_pci_ops, vdev); if (ret) goto out_free; @@ -2014,7 +1928,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) vfio_pci_set_power_state(vdev, PCI_D3hot); } - return ret; + return vdev; out_vf_token: kfree(vdev->vf_token); @@ -2026,10 +1940,11 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) kfree(vdev); out_group_put: vfio_iommu_group_put(group, &pdev->dev); - return ret; + return ERR_PTR(ret); } +EXPORT_SYMBOL_GPL(vfio_create_pci_device); -static void vfio_pci_remove(struct pci_dev *pdev) +void vfio_destroy_pci_device(struct pci_dev *pdev) { struct vfio_pci_core_device *vdev; @@ -2067,9 +1982,10 @@ static void vfio_pci_remove(struct pci_dev *pdev) VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM); } } +EXPORT_SYMBOL_GPL(vfio_destroy_pci_device); -static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, - pci_channel_state_t state) +static pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev, + pci_channel_state_t state) { struct vfio_pci_core_device *vdev; struct vfio_device *device; @@ -2096,7 +2012,7 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, return PCI_ERS_RESULT_CAN_RECOVER; } -static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn) +int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn) { struct vfio_pci_core_device *vdev; struct vfio_device *device; @@ -2104,9 +2020,6 @@ static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn) might_sleep(); - if (!enable_sriov) - return -ENOENT; - device = vfio_device_get_from_dev(&pdev->dev); if (!device) return -ENODEV; @@ -2126,19 +2039,12 @@ static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn) return ret < 0 ? ret : nr_virtfn; } +EXPORT_SYMBOL_GPL(vfio_pci_core_sriov_configure); -static const struct pci_error_handlers vfio_err_handlers = { - .error_detected = vfio_pci_aer_err_detected, -}; - -static struct pci_driver vfio_pci_driver = { - .name = "vfio-pci", - .id_table = NULL, /* only dynamic ids */ - .probe = vfio_pci_probe, - .remove = vfio_pci_remove, - .sriov_configure = vfio_pci_sriov_configure, - .err_handler = &vfio_err_handlers, +const struct pci_error_handlers vfio_pci_core_err_handlers = { + .error_detected = vfio_pci_core_aer_err_detected, }; +EXPORT_SYMBOL_GPL(vfio_pci_core_err_handlers); static DEFINE_MUTEX(reflck_lock); @@ -2171,13 +2077,13 @@ static int vfio_pci_reflck_find(struct pci_dev *pdev, void *data) if (!device) return 0; - if (pci_dev_driver(pdev) != &vfio_pci_driver) { + vdev = vfio_device_data(device); + + if (pci_dev_driver(pdev) != pci_dev_driver(vdev->pdev)) { vfio_device_put(device); return 0; } - vdev = vfio_device_data(device); - if (vdev->reflck) { vfio_pci_reflck_get(vdev->reflck); *preflck = vdev->reflck; @@ -2233,13 +2139,13 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data) if (!device) return -EINVAL; - if (pci_dev_driver(pdev) != &vfio_pci_driver) { + vdev = vfio_device_data(device); + + if (pci_dev_driver(pdev) != pci_dev_driver(vdev->pdev)) { vfio_device_put(device); return -EBUSY; } - vdev = vfio_device_data(device); - /* Fault if the device is not unused */ if (vdev->refcnt) { vfio_device_put(device); @@ -2263,13 +2169,13 @@ static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data) if (!device) return -EINVAL; - if (pci_dev_driver(pdev) != &vfio_pci_driver) { + vdev = vfio_device_data(device); + + if (pci_dev_driver(pdev) != pci_dev_driver(vdev->pdev)) { vfio_device_put(device); return -EBUSY; } - vdev = vfio_device_data(device); - /* * Locking multiple devices is prone to deadlock, runaway and * unwind if we hit contention. @@ -2358,81 +2264,19 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_core_device *vdev) kfree(devs.devices); } -static void __exit vfio_pci_cleanup(void) +static void __exit vfio_pci_core_cleanup(void) { - pci_unregister_driver(&vfio_pci_driver); vfio_pci_uninit_perm_bits(); } -static void __init vfio_pci_fill_ids(void) +static int __init vfio_pci_core_init(void) { - char *p, *id; - int rc; - - /* no ids passed actually */ - if (ids[0] == '\0') - return; - - /* add ids specified in the module parameter */ - p = ids; - while ((id = strsep(&p, ","))) { - unsigned int vendor, device, subvendor = PCI_ANY_ID, - subdevice = PCI_ANY_ID, class = 0, class_mask = 0; - int fields; - - if (!strlen(id)) - continue; - - fields = sscanf(id, "%x:%x:%x:%x:%x:%x", - &vendor, &device, &subvendor, &subdevice, - &class, &class_mask); - - if (fields < 2) { - pr_warn("invalid id string \"%s\"\n", id); - continue; - } - - rc = pci_add_dynid(&vfio_pci_driver, vendor, device, - subvendor, subdevice, class, class_mask, 0); - if (rc) - pr_warn("failed to add dynamic id [%04x:%04x[%04x:%04x]] class %#08x/%08x (%d)\n", - vendor, device, subvendor, subdevice, - class, class_mask, rc); - else - pr_info("add [%04x:%04x[%04x:%04x]] class %#08x/%08x\n", - vendor, device, subvendor, subdevice, - class, class_mask); - } -} - -static int __init vfio_pci_init(void) -{ - int ret; - /* Allocate shared config space permision data used by all devices */ - ret = vfio_pci_init_perm_bits(); - if (ret) - return ret; - - /* Register and scan for devices */ - ret = pci_register_driver(&vfio_pci_driver); - if (ret) - goto out_driver; - - vfio_pci_fill_ids(); - - if (disable_denylist) - pr_warn("device denylist disabled.\n"); - - return 0; - -out_driver: - vfio_pci_uninit_perm_bits(); - return ret; + return vfio_pci_init_perm_bits(); } -module_init(vfio_pci_init); -module_exit(vfio_pci_cleanup); +module_init(vfio_pci_core_init); +module_exit(vfio_pci_core_cleanup); MODULE_VERSION(DRIVER_VERSION); MODULE_LICENSE("GPL v2"); diff --git a/drivers/vfio/pci/vfio_pci_core.h b/drivers/vfio/pci/vfio_pci_core.h index 3964ca898984..a3517a9472bd 100644 --- a/drivers/vfio/pci/vfio_pci_core.h +++ b/drivers/vfio/pci/vfio_pci_core.h @@ -10,6 +10,7 @@ #include <linux/mutex.h> #include <linux/pci.h> +#include <linux/vfio.h> #include <linux/irqbypass.h> #include <linux/types.h> #include <linux/uuid.h> @@ -100,48 +101,52 @@ struct vfio_pci_mmap_vma { }; struct vfio_pci_core_device { - struct pci_dev *pdev; - void __iomem *barmap[PCI_STD_NUM_BARS]; - bool bar_mmap_supported[PCI_STD_NUM_BARS]; - u8 *pci_config_map; - u8 *vconfig; - struct perm_bits *msi_perm; - spinlock_t irqlock; - struct mutex igate; - struct vfio_pci_irq_ctx *ctx; - int num_ctx; - int irq_type; - int num_regions; - struct vfio_pci_region *region; - u8 msi_qmax; - u8 msix_bar; - u16 msix_size; - u32 msix_offset; - u32 rbar[7]; - bool pci_2_3; - bool virq_disabled; - bool reset_works; - bool extended_caps; - bool bardirty; - bool has_vga; - bool needs_reset; - bool nointx; - bool needs_pm_restore; - struct pci_saved_state *pci_saved_state; - struct pci_saved_state *pm_save; - struct vfio_pci_reflck *reflck; - int refcnt; - int ioeventfds_nr; - struct eventfd_ctx *err_trigger; - struct eventfd_ctx *req_trigger; - struct list_head dummy_resources_list; - struct mutex ioeventfds_lock; - struct list_head ioeventfds_list; + /* below are the public fields used by vfio_pci drivers */ + struct pci_dev *pdev; + const struct vfio_device_ops *vfio_pci_ops; + struct vfio_pci_reflck *reflck; + int refcnt; + struct vfio_pci_region *region; + u8 *pci_config_map; + u8 *vconfig; + + /* below are the private internal fields used by vfio_pci_core */ + void __iomem *barmap[PCI_STD_NUM_BARS]; + bool bar_mmap_supported[PCI_STD_NUM_BARS]; + struct perm_bits *msi_perm; + spinlock_t irqlock; + struct mutex igate; + struct vfio_pci_irq_ctx *ctx; + int num_ctx; + int irq_type; + int num_regions; + u8 msi_qmax; + u8 msix_bar; + u16 msix_size; + u32 msix_offset; + u32 rbar[7]; + bool pci_2_3; + bool virq_disabled; + bool reset_works; + bool extended_caps; + bool bardirty; + bool has_vga; + bool needs_reset; + bool nointx; + bool needs_pm_restore; + struct pci_saved_state *pci_saved_state; + struct pci_saved_state *pm_save; + int ioeventfds_nr; + struct eventfd_ctx *err_trigger; + struct eventfd_ctx *req_trigger; + struct list_head dummy_resources_list; + struct mutex ioeventfds_lock; + struct list_head ioeventfds_list; struct vfio_pci_vf_token *vf_token; - struct notifier_block nb; - struct mutex vma_lock; - struct list_head vma_list; - struct rw_semaphore memory_lock; + struct notifier_block nb; + struct mutex vma_lock; + struct list_head vma_list; + struct rw_semaphore memory_lock; }; #define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX) @@ -225,4 +230,30 @@ static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, } #endif +/* Exported functions */ +struct vfio_pci_core_device *vfio_create_pci_device(struct pci_dev *pdev, + const struct vfio_device_ops *vfio_pci_ops); +void vfio_destroy_pci_device(struct pci_dev *pdev); + +long vfio_pci_core_ioctl(void *device_data, unsigned int cmd, + unsigned long arg); +ssize_t vfio_pci_core_read(void *device_data, char __user *buf, size_t count, + loff_t *ppos); +ssize_t vfio_pci_core_write(void *device_data, const char __user *buf, + size_t count, loff_t *ppos); +int vfio_pci_core_mmap(void *device_data, struct vm_area_struct *vma); +void vfio_pci_core_request(void *device_data, unsigned int count); +int vfio_pci_core_match(void *device_data, char *buf); + +void vfio_pci_core_disable(struct vfio_pci_core_device *vdev); +int vfio_pci_core_enable(struct vfio_pci_core_device *vdev); +void vfio_pci_core_spapr_eeh_open(struct vfio_pci_core_device *vdev); +void vfio_pci_core_spapr_eeh_release(struct vfio_pci_core_device *vdev); +void vfio_pci_vf_token_user_add(struct vfio_pci_core_device *vdev, int val); +void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev); + +int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn); + +extern const struct pci_error_handlers vfio_pci_core_err_handlers; + #endif /* VFIO_PCI_CORE_H */ -- 2.25.4