This driver will register to PCI bus and Auxiliary bus. In case the probe of both devices will succeed, we'll have a vendor specific VFIO PCI device. mlx5_vfio_pci use vfio_pci_core to register and create a VFIO device and use auxiliary_device to get the needed extension from the vendor device driver. If one of the probe() functions will fail, the VFIO char device will not be created. For now, only register and bind the auxiliary_device to the pci_device in case we have a match between the auxiliary_device id to the pci_device BDF. Later, vendor specific features such as live migration will be added and will be available to the virtualization software. Note: Although we've created the mlx5-vfio-pci.ko, the binding to vfio-pci.ko will still work as before. It's fully backward compatible. Of course, the extended vendor functionality will not exist in case one will bind the device to the generic vfio_pci.ko. Signed-off-by: Max Gurtovoy <mgurtovoy@xxxxxxxxxx> --- drivers/vfio/pci/Kconfig | 10 ++ drivers/vfio/pci/Makefile | 3 + drivers/vfio/pci/mlx5_vfio_pci.c | 253 +++++++++++++++++++++++++++++++ include/linux/mlx5/vfio_pci.h | 36 +++++ 4 files changed, 302 insertions(+) create mode 100644 drivers/vfio/pci/mlx5_vfio_pci.c create mode 100644 include/linux/mlx5/vfio_pci.h diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 5f90be27fba0..2133cd2f9c92 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -65,3 +65,13 @@ config VFIO_PCI_ZDEV for zPCI devices passed through via VFIO on s390. Say Y here. + +config MLX5_VFIO_PCI + tristate "VFIO support for MLX5 PCI devices" + depends on VFIO_PCI_CORE && MLX5_CORE + select AUXILIARY_BUS + help + This provides a generic PCI support for MLX5 devices using the VFIO + framework. + + If you don't know what to do here, say N. diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index 3f2a27e222cd..9f67edca31c5 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_VFIO_PCI_CORE) += vfio-pci-core.o obj-$(CONFIG_VFIO_PCI) += vfio-pci.o +obj-$(CONFIG_MLX5_VFIO_PCI) += mlx5-vfio-pci.o vfio-pci-core-y := vfio_pci_core.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o vfio-pci-core-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o @@ -9,3 +10,5 @@ vfio-pci-core-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o vfio-pci-core-$(CONFIG_VFIO_PCI_ZDEV) += vfio_pci_zdev.o vfio-pci-y := vfio_pci.o + +mlx5-vfio-pci-y := mlx5_vfio_pci.o diff --git a/drivers/vfio/pci/mlx5_vfio_pci.c b/drivers/vfio/pci/mlx5_vfio_pci.c new file mode 100644 index 000000000000..98cc2d037b0d --- /dev/null +++ b/drivers/vfio/pci/mlx5_vfio_pci.c @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * Author: Max Gurtovoy <mgurtovoy@xxxxxxxxxx> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/device.h> +#include <linux/eventfd.h> +#include <linux/file.h> +#include <linux/interrupt.h> +#include <linux/iommu.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/notifier.h> +#include <linux/pci.h> +#include <linux/pm_runtime.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/vfio.h> +#include <linux/sched/mm.h> +#include <linux/mlx5/vfio_pci.h> + +#include "vfio_pci_private.h" + +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "Max Gurtovoy <mgurtovoy@xxxxxxxxxx>" +#define DRIVER_DESC "MLX5 VFIO PCI - User Level meta-driver for NVIDIA MLX5 device family" + +static LIST_HEAD(aux_devs_list); +static DEFINE_MUTEX(aux_devs_lock); + +static struct mlx5_vfio_pci_adev *mlx5_vfio_pci_find_adev(struct pci_dev *pdev) +{ + struct mlx5_vfio_pci_adev *mvadev, *found = NULL; + + mutex_lock(&aux_devs_lock); + list_for_each_entry(mvadev, &aux_devs_list, entry) { + if (mvadev->madev.adev.id == pci_dev_id(pdev)) { + found = mvadev; + break; + } + } + mutex_unlock(&aux_devs_lock); + + return found; +} + +static int mlx5_vfio_pci_aux_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct mlx5_vfio_pci_adev *mvadev; + + mvadev = adev_to_mvadev(adev); + + pr_info("%s aux probing bdf %02x:%02x.%d mdev is %s\n", + adev->name, + PCI_BUS_NUM(adev->id & 0xffff), + PCI_SLOT(adev->id & 0xff), + PCI_FUNC(adev->id & 0xff), dev_name(mvadev->madev.mdev->device)); + + mutex_lock(&aux_devs_lock); + list_add(&mvadev->entry, &aux_devs_list); + mutex_unlock(&aux_devs_lock); + + return 0; +} + +static void mlx5_vfio_pci_aux_remove(struct auxiliary_device *adev) +{ + struct mlx5_vfio_pci_adev *mvadev = adev_to_mvadev(adev); + struct vfio_pci_device *vdev = dev_get_drvdata(&adev->dev); + + /* TODO: is this the right thing to do ? maybe FLR ? */ + if (vdev) + pci_reset_function(vdev->pdev); + + mutex_lock(&aux_devs_lock); + list_del(&mvadev->entry); + mutex_unlock(&aux_devs_lock); +} + +static const struct auxiliary_device_id mlx5_vfio_pci_aux_id_table[] = { + { .name = MLX5_ADEV_NAME ".vfio_pci", }, + {}, +}; + +MODULE_DEVICE_TABLE(auxiliary, mlx5_vfio_pci_aux_id_table); + +static struct auxiliary_driver mlx5_vfio_pci_aux_driver = { + .name = "vfio_pci_ex", + .probe = mlx5_vfio_pci_aux_probe, + .remove = mlx5_vfio_pci_aux_remove, + .id_table = mlx5_vfio_pci_aux_id_table, +}; + +static struct pci_driver mlx5_vfio_pci_driver; + +static ssize_t mlx5_vfio_pci_write(void *device_data, + const char __user *buf, size_t count, loff_t *ppos) +{ + /* DO vendor specific stuff here ? */ + + return vfio_pci_core_write(device_data, buf, count, ppos); +} + +static ssize_t mlx5_vfio_pci_read(void *device_data, char __user *buf, + size_t count, loff_t *ppos) +{ + /* DO vendor specific stuff here ? */ + + return vfio_pci_core_read(device_data, buf, count, ppos); +} + +static long mlx5_vfio_pci_ioctl(void *device_data, unsigned int cmd, + unsigned long arg) +{ + /* DO vendor specific stuff here ? */ + + return vfio_pci_core_ioctl(device_data, cmd, arg); +} + +static void mlx5_vfio_pci_release(void *device_data) +{ + /* DO vendor specific stuff here ? */ + + vfio_pci_core_release(device_data); +} + +static int mlx5_vfio_pci_open(void *device_data) +{ + /* DO vendor specific stuff here ? */ + + return vfio_pci_core_open(device_data); +} + +static const struct vfio_device_ops mlx5_vfio_pci_ops = { + .name = "mlx5-vfio-pci", + .open = mlx5_vfio_pci_open, + .release = mlx5_vfio_pci_release, + .ioctl = mlx5_vfio_pci_ioctl, + .read = mlx5_vfio_pci_read, + .write = mlx5_vfio_pci_write, + .mmap = vfio_pci_core_mmap, + .request = vfio_pci_core_request, + .match = vfio_pci_core_match, +}; + +static int mlx5_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct vfio_pci_device *vdev; + struct mlx5_vfio_pci_adev *mvadev; + + mvadev = mlx5_vfio_pci_find_adev(pdev); + if (!mvadev) { + pr_err("failed to find aux device for %s\n", + dev_name(&pdev->dev)); + return -ENODEV; + } + + vdev = vfio_create_pci_device(pdev, &mlx5_vfio_pci_ops, mvadev); + if (IS_ERR(vdev)) + return PTR_ERR(vdev); + + dev_set_drvdata(&mvadev->madev.adev.dev, vdev); + return 0; +} + +static void mlx5_vfio_pci_remove(struct pci_dev *pdev) +{ + struct mlx5_vfio_pci_adev *mvadev; + + mvadev = mlx5_vfio_pci_find_adev(pdev); + if (mvadev) + dev_set_drvdata(&mvadev->madev.adev.dev, NULL); + + vfio_destroy_pci_device(pdev); +} + +static pci_ers_result_t mlx5_vfio_pci_aer_err_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + /* DO vendor specific stuff here ? */ + + return vfio_pci_core_aer_err_detected(pdev, state); +} + +#ifdef CONFIG_PCI_IOV +static int mlx5_vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn) +{ + might_sleep(); + + /* DO vendor specific stuff here */ + + return vfio_pci_core_sriov_configure(pdev, nr_virtfn); +} +#endif + +static const struct pci_error_handlers mlx5_vfio_err_handlers = { + .error_detected = mlx5_vfio_pci_aer_err_detected, +}; + +static const struct pci_device_id mlx5_vfio_pci_table[] = { + { PCI_VDEVICE(MELLANOX, 0x6001) }, /* NVMe SNAP controllers */ + { PCI_DEVICE_SUB(PCI_VENDOR_ID_REDHAT_QUMRANET, 0x1042, + PCI_VENDOR_ID_MELLANOX, PCI_ANY_ID) }, /* Virtio SNAP controllers */ + { 0, } +}; + +static struct pci_driver mlx5_vfio_pci_driver = { + .name = "mlx5-vfio-pci", + .id_table = mlx5_vfio_pci_table, + .probe = mlx5_vfio_pci_probe, + .remove = mlx5_vfio_pci_remove, +#ifdef CONFIG_PCI_IOV + .sriov_configure = mlx5_vfio_pci_sriov_configure, +#endif + .err_handler = &mlx5_vfio_err_handlers, +}; + +static void __exit mlx5_vfio_pci_cleanup(void) +{ + auxiliary_driver_unregister(&mlx5_vfio_pci_aux_driver); + pci_unregister_driver(&mlx5_vfio_pci_driver); +} + +static int __init mlx5_vfio_pci_init(void) +{ + int ret; + + ret = pci_register_driver(&mlx5_vfio_pci_driver); + if (ret) + return ret; + + ret = auxiliary_driver_register(&mlx5_vfio_pci_aux_driver); + if (ret) + goto out_unregister; + + return 0; + +out_unregister: + pci_unregister_driver(&mlx5_vfio_pci_driver); + return ret; +} + +module_init(mlx5_vfio_pci_init); +module_exit(mlx5_vfio_pci_cleanup); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/include/linux/mlx5/vfio_pci.h b/include/linux/mlx5/vfio_pci.h new file mode 100644 index 000000000000..c1e7b4d6da30 --- /dev/null +++ b/include/linux/mlx5/vfio_pci.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2020 NVIDIA Corporation + */ + +#ifndef _VFIO_PCI_H +#define _VFIO_PCI_H + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/auxiliary_bus.h> +#include <linux/mlx5/device.h> +#include <linux/mlx5/driver.h> + +struct mlx5_vfio_pci_adev { + struct mlx5_adev madev; + + /* These fields should not be used outside mlx5_vfio_pci.ko */ + struct list_head entry; +}; + +static inline struct mlx5_vfio_pci_adev* +madev_to_mvadev(struct mlx5_adev *madev) +{ + return container_of(madev, struct mlx5_vfio_pci_adev, madev); +} + +static inline struct mlx5_vfio_pci_adev* +adev_to_mvadev(struct auxiliary_device *adev) +{ + struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev); + + return madev_to_mvadev(madev); +} + +#endif -- 2.25.4