> From: Alex Williamson > Sent: Thursday, February 20, 2020 2:54 AM > > With the VF Token interface we can now expect that a vfio userspace > driver must be in collaboration with the PF driver, an unwitting > userspace driver will not be able to get past the GET_DEVICE_FD step > in accessing the device. We can now move on to actually allowing > SR-IOV to be enabled by vfio-pci on the PF. Support for this is not > enabled by default in this commit, but it does provide a module option > for this to be enabled (enable_sriov=1). Enabling VFs is rather > straightforward, except we don't want to risk that a VF might get > autoprobed and bound to other drivers, so a bus notifier is used to > "capture" VFs to vfio-pci using the driver_override support. We > assume any later action to bind the device to other drivers is > condoned by the system admin and allow it with a log warning. > > vfio-pci will disable SR-IOV on a PF before releasing the device, > allowing a VF driver to be assured other drivers cannot take over the > PF and that any other userspace driver must know the shared VF token. > This support also does not provide a mechanism for the PF userspace > driver itself to manipulate SR-IOV through the vfio API. With this > patch SR-IOV can only be enabled via the host sysfs interface and the > PF driver user cannot create or remove VFs. I'm not sure how many devices can be properly configured simply with pci_enable_sriov. It is not unusual to require PF driver prepare something before turning PCI SR-IOV capability. If you look kernel PF drivers, there are only two using generic pci_sriov_configure_ simple (simple wrapper like pci_enable_sriov), while most others implementing their own callback. However vfio itself has no idea thus I'm not sure how an user knows whether using this option can actually meet his purpose. I may miss something here, possibly using DPDK as an example will make it clearer. > > Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx> > --- > drivers/vfio/pci/vfio_pci.c | 106 +++++++++++++++++++++++++++++++-- > -- > drivers/vfio/pci/vfio_pci_private.h | 2 + > 2 files changed, 97 insertions(+), 11 deletions(-) > > diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c > index e4d5d26e5e71..b40ade48a844 100644 > --- a/drivers/vfio/pci/vfio_pci.c > +++ b/drivers/vfio/pci/vfio_pci.c > @@ -54,6 +54,12 @@ module_param(disable_idle_d3, bool, S_IRUGO | > S_IWUSR); > MODULE_PARM_DESC(disable_idle_d3, > "Disable using the PCI D3 low power state for idle, unused > devices"); > > +static bool enable_sriov; > +#ifdef CONFIG_PCI_IOV > +module_param(enable_sriov, bool, 0644); > +MODULE_PARM_DESC(enable_sriov, "Enable support for SR-IOV > configuration"); > +#endif > + > static inline bool vfio_vga_disabled(void) > { > #ifdef CONFIG_VFIO_PCI_VGA > @@ -1528,6 +1534,35 @@ static const struct vfio_device_ops vfio_pci_ops = > { > > static int vfio_pci_reflck_attach(struct vfio_pci_device *vdev); > static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck); > +static struct pci_driver vfio_pci_driver; > + > +static int vfio_pci_bus_notifier(struct notifier_block *nb, > + unsigned long action, void *data) > +{ > + struct vfio_pci_device *vdev = container_of(nb, > + struct vfio_pci_device, nb); > + struct device *dev = data; > + struct pci_dev *pdev = to_pci_dev(dev); > + struct pci_dev *physfn = pci_physfn(pdev); > + > + if (action == BUS_NOTIFY_ADD_DEVICE && > + pdev->is_virtfn && physfn == vdev->pdev) { > + pci_info(vdev->pdev, "Captured SR-IOV VF %s > driver_override\n", > + pci_name(pdev)); > + pdev->driver_override = kasprintf(GFP_KERNEL, "%s", > + vfio_pci_ops.name); > + } else if (action == BUS_NOTIFY_BOUND_DRIVER && > + pdev->is_virtfn && physfn == vdev->pdev) { > + struct pci_driver *drv = pci_dev_driver(pdev); > + > + if (drv && drv != &vfio_pci_driver) > + pci_warn(vdev->pdev, > + "VF %s bound to driver %s while PF bound to > vfio-pci\n", > + pci_name(pdev), drv->name); > + } > + > + return 0; > +} > > static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > { > @@ -1539,12 +1574,12 @@ static int vfio_pci_probe(struct pci_dev *pdev, > const struct pci_device_id *id) > return -EINVAL; > > /* > - * Prevent binding to PFs with VFs enabled, this too easily allows > - * userspace instance with VFs and PFs from the same device, which > - * cannot work. Disabling SR-IOV here would initiate removing the > - * VFs, which would unbind the driver, which is prone to blocking > - * if that VF is also in use by vfio-pci. Just reject these PFs > - * and let the user sort it out. > + * Prevent binding to PFs with VFs enabled, the VFs might be in use > + * by the host or other users. We cannot capture the VFs if they > + * already exist, nor can we track VF users. Disabling SR-IOV here > + * would initiate removing the VFs, which would unbind the driver, > + * which is prone to blocking if that VF is also in use by vfio-pci. > + * Just reject these PFs and let the user sort it out. > */ > if (pci_num_vf(pdev)) { > pci_warn(pdev, "Cannot bind to PF with SR-IOV enabled\n"); > @@ -1592,6 +1627,18 @@ static int vfio_pci_probe(struct pci_dev *pdev, > const struct pci_device_id *id) > kfree(vdev); > return -ENOMEM; > } > + > + vdev->nb.notifier_call = vfio_pci_bus_notifier; > + ret = bus_register_notifier(&pci_bus_type, &vdev->nb); > + if (ret) { > + kfree(vdev->vf_token); > + vfio_pci_reflck_put(vdev->reflck); > + vfio_del_group_dev(&pdev->dev); > + vfio_iommu_group_put(group, &pdev->dev); > + kfree(vdev); > + return ret; > + } > + > mutex_init(&vdev->vf_token->lock); > uuid_gen(&vdev->vf_token->uuid); > } > @@ -1625,6 +1672,8 @@ static void vfio_pci_remove(struct pci_dev *pdev) > { > struct vfio_pci_device *vdev; > > + pci_disable_sriov(pdev); > + > vdev = vfio_del_group_dev(&pdev->dev); > if (!vdev) > return; > @@ -1635,6 +1684,9 @@ static void vfio_pci_remove(struct pci_dev *pdev) > kfree(vdev->vf_token); > } > > + if (vdev->nb.notifier_call) > + bus_unregister_notifier(&pci_bus_type, &vdev->nb); > + > vfio_pci_reflck_put(vdev->reflck); > > vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev); > @@ -1683,16 +1735,48 @@ static pci_ers_result_t > vfio_pci_aer_err_detected(struct pci_dev *pdev, > return PCI_ERS_RESULT_CAN_RECOVER; > } > > +static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn) > +{ > + struct vfio_pci_device *vdev; > + struct vfio_device *device; > + int ret = 0; > + > + might_sleep(); > + > + if (!enable_sriov) > + return -ENOENT; > + > + device = vfio_device_get_from_dev(&pdev->dev); > + if (!device) > + return -ENODEV; > + > + vdev = vfio_device_data(device); > + if (!vdev) { > + vfio_device_put(device); > + return -ENODEV; > + } > + > + if (nr_virtfn == 0) > + pci_disable_sriov(pdev); > + else > + ret = pci_enable_sriov(pdev, nr_virtfn); > + > + vfio_device_put(device); > + > + return ret < 0 ? ret : nr_virtfn; > +} > + > static const struct pci_error_handlers vfio_err_handlers = { > .error_detected = vfio_pci_aer_err_detected, > }; > > static struct pci_driver vfio_pci_driver = { > - .name = "vfio-pci", > - .id_table = NULL, /* only dynamic ids */ > - .probe = vfio_pci_probe, > - .remove = vfio_pci_remove, > - .err_handler = &vfio_err_handlers, > + .name = "vfio-pci", > + .id_table = NULL, /* only dynamic ids */ > + .probe = vfio_pci_probe, > + .remove = vfio_pci_remove, > + .sriov_configure = vfio_pci_sriov_configure, > + .err_handler = &vfio_err_handlers, > }; > > static DEFINE_MUTEX(reflck_lock); > diff --git a/drivers/vfio/pci/vfio_pci_private.h > b/drivers/vfio/pci/vfio_pci_private.h > index 76c11c915949..36ec69081ecd 100644 > --- a/drivers/vfio/pci/vfio_pci_private.h > +++ b/drivers/vfio/pci/vfio_pci_private.h > @@ -13,6 +13,7 @@ > #include <linux/irqbypass.h> > #include <linux/types.h> > #include <linux/uuid.h> > +#include <linux/notifier.h> > > #ifndef VFIO_PCI_PRIVATE_H > #define VFIO_PCI_PRIVATE_H > @@ -130,6 +131,7 @@ struct vfio_pci_device { > struct mutex ioeventfds_lock; > struct list_head ioeventfds_list; > struct vfio_pci_vf_token *vf_token; > + struct notifier_block nb; > }; > > #define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)