On Tue, 21 Nov 2023 02:51:11 +0000 Yahui Cao <yahui.cao@xxxxxxxxx> wrote: > From: Lingyu Liu <lingyu.liu@xxxxxxxxx> > > Add a vendor-specific vfio_pci driver for E800 devices. > > It uses vfio_pci_core to register to the VFIO subsystem and then > implements the E800 specific logic to support VF live migration. > > It implements the device state transition flow for live > migration. > > Signed-off-by: Lingyu Liu <lingyu.liu@xxxxxxxxx> > Signed-off-by: Yahui Cao <yahui.cao@xxxxxxxxx> > --- > MAINTAINERS | 7 + > drivers/vfio/pci/Kconfig | 2 + > drivers/vfio/pci/Makefile | 2 + > drivers/vfio/pci/ice/Kconfig | 10 + > drivers/vfio/pci/ice/Makefile | 4 + > drivers/vfio/pci/ice/ice_vfio_pci.c | 707 ++++++++++++++++++++++++++++ > 6 files changed, 732 insertions(+) > create mode 100644 drivers/vfio/pci/ice/Kconfig > create mode 100644 drivers/vfio/pci/ice/Makefile > create mode 100644 drivers/vfio/pci/ice/ice_vfio_pci.c > > diff --git a/MAINTAINERS b/MAINTAINERS > index 97f51d5ec1cf..c8faf7fe1bd1 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -22860,6 +22860,13 @@ L: kvm@xxxxxxxxxxxxxxx > S: Maintained > F: drivers/vfio/pci/mlx5/ > > +VFIO ICE PCI DRIVER > +M: Yahui Cao <yahui.cao@xxxxxxxxx> > +M: Lingyu Liu <lingyu.liu@xxxxxxxxx> > +L: kvm@xxxxxxxxxxxxxxx > +S: Maintained > +F: drivers/vfio/pci/ice/ > + > VFIO PCI DEVICE SPECIFIC DRIVERS > R: Jason Gunthorpe <jgg@xxxxxxxxxx> > R: Yishai Hadas <yishaih@xxxxxxxxxx> > diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig > index 8125e5f37832..6618208947af 100644 > --- a/drivers/vfio/pci/Kconfig > +++ b/drivers/vfio/pci/Kconfig > @@ -65,4 +65,6 @@ source "drivers/vfio/pci/hisilicon/Kconfig" > > source "drivers/vfio/pci/pds/Kconfig" > > +source "drivers/vfio/pci/ice/Kconfig" > + > endmenu > diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile > index 45167be462d8..fc1df82df3ac 100644 > --- a/drivers/vfio/pci/Makefile > +++ b/drivers/vfio/pci/Makefile > @@ -13,3 +13,5 @@ obj-$(CONFIG_MLX5_VFIO_PCI) += mlx5/ > obj-$(CONFIG_HISI_ACC_VFIO_PCI) += hisilicon/ > > obj-$(CONFIG_PDS_VFIO_PCI) += pds/ > + > +obj-$(CONFIG_ICE_VFIO_PCI) += ice/ > diff --git a/drivers/vfio/pci/ice/Kconfig b/drivers/vfio/pci/ice/Kconfig > new file mode 100644 > index 000000000000..0b8cd1489073 > --- /dev/null > +++ b/drivers/vfio/pci/ice/Kconfig > @@ -0,0 +1,10 @@ > +# SPDX-License-Identifier: GPL-2.0-only > +config ICE_VFIO_PCI > + tristate "VFIO support for Intel(R) Ethernet Connection E800 Series" > + depends on ICE > + select VFIO_PCI_CORE > + help > + This provides migration support for Intel(R) Ethernet connection E800 > + series devices using the VFIO framework. > + > + If you don't know what to do here, say N. > diff --git a/drivers/vfio/pci/ice/Makefile b/drivers/vfio/pci/ice/Makefile > new file mode 100644 > index 000000000000..259d4ab89105 > --- /dev/null > +++ b/drivers/vfio/pci/ice/Makefile > @@ -0,0 +1,4 @@ > +# SPDX-License-Identifier: GPL-2.0-only > +obj-$(CONFIG_ICE_VFIO_PCI) += ice-vfio-pci.o > +ice-vfio-pci-y := ice_vfio_pci.o > + > diff --git a/drivers/vfio/pci/ice/ice_vfio_pci.c b/drivers/vfio/pci/ice/ice_vfio_pci.c > new file mode 100644 > index 000000000000..28a181aa2f3f > --- /dev/null > +++ b/drivers/vfio/pci/ice/ice_vfio_pci.c > @@ -0,0 +1,707 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (C) 2018-2023 Intel Corporation */ > + > +#include <linux/device.h> > +#include <linux/module.h> > +#include <linux/types.h> > +#include <linux/file.h> > +#include <linux/pci.h> > +#include <linux/vfio_pci_core.h> > +#include <linux/net/intel/ice_migration.h> > +#include <linux/anon_inodes.h> > + > +#define DRIVER_DESC "ICE VFIO PCI - User Level meta-driver for Intel E800 device family" > + > +struct ice_vfio_pci_migration_file { > + struct file *filp; > + struct mutex lock; /* protect migration file access */ > + bool disabled; > + > + u8 mig_data[SZ_128K]; > + size_t total_length; > +}; > + > +struct ice_vfio_pci_core_device { > + struct vfio_pci_core_device core_device; > + u8 deferred_reset:1; Move vf_id here to use some of the hole this leaves. > + struct mutex state_mutex; /* protect migration state */ > + enum vfio_device_mig_state mig_state; > + /* protect the reset_done flow */ > + spinlock_t reset_lock; > + struct ice_vfio_pci_migration_file *resuming_migf; > + struct ice_vfio_pci_migration_file *saving_migf; > + struct vfio_device_migration_info mig_info; > + u8 *mig_data; > + struct ice_pf *pf; > + int vf_id; > +}; > + > +/** > + * ice_vfio_pci_load_state - VFIO device state reloading > + * @ice_vdev: pointer to ice vfio pci core device structure > + * > + * Load device state. This function is called when the userspace VFIO uAPI > + * consumer wants to load the device state info from VFIO migration region and > + * load them into the device. This function should make sure all the device > + * state info is loaded successfully. As a result, return value is mandatory > + * to be checked. > + * > + * Return 0 for success, negative value for failure. > + */ > +static int __must_check > +ice_vfio_pci_load_state(struct ice_vfio_pci_core_device *ice_vdev) > +{ > + struct ice_vfio_pci_migration_file *migf = ice_vdev->resuming_migf; > + > + return ice_migration_load_devstate(ice_vdev->pf, > + ice_vdev->vf_id, > + migf->mig_data, > + migf->total_length); > +} > + > +/** > + * ice_vfio_pci_save_state - VFIO device state saving > + * @ice_vdev: pointer to ice vfio pci core device structure > + * @migf: pointer to migration file > + * > + * Snapshot the device state and save it. This function is called when the > + * VFIO uAPI consumer wants to snapshot the current device state and saves > + * it into the VFIO migration region. This function should make sure all > + * of the device state info is collectted and saved successfully. As a > + * result, return value is mandatory to be checked. > + * > + * Return 0 for success, negative value for failure. > + */ > +static int __must_check > +ice_vfio_pci_save_state(struct ice_vfio_pci_core_device *ice_vdev, > + struct ice_vfio_pci_migration_file *migf) > +{ > + migf->total_length = SZ_128K; > + > + return ice_migration_save_devstate(ice_vdev->pf, > + ice_vdev->vf_id, > + migf->mig_data, > + migf->total_length); > +} > + > +/** > + * ice_vfio_migration_init - Initialization for live migration function > + * @ice_vdev: pointer to ice vfio pci core device structure > + * > + * Returns 0 on success, negative value on error > + */ > +static int ice_vfio_migration_init(struct ice_vfio_pci_core_device *ice_vdev) > +{ > + struct pci_dev *pdev = ice_vdev->core_device.pdev; > + > + ice_vdev->pf = ice_migration_get_pf(pdev); > + if (!ice_vdev->pf) > + return -EFAULT; > + > + ice_vdev->vf_id = pci_iov_vf_id(pdev); > + if (ice_vdev->vf_id < 0) > + return -EINVAL; > + > + return ice_migration_init_dev(ice_vdev->pf, ice_vdev->vf_id); > +} > + > +/** > + * ice_vfio_migration_uninit - Cleanup for live migration function > + * @ice_vdev: pointer to ice vfio pci core device structure > + */ > +static void ice_vfio_migration_uninit(struct ice_vfio_pci_core_device *ice_vdev) > +{ > + ice_migration_uninit_dev(ice_vdev->pf, ice_vdev->vf_id); > +} > + > +/** > + * ice_vfio_pci_disable_fd - Close migration file > + * @migf: pointer to ice vfio pci migration file > + */ > +static void ice_vfio_pci_disable_fd(struct ice_vfio_pci_migration_file *migf) > +{ > + mutex_lock(&migf->lock); > + migf->disabled = true; > + migf->total_length = 0; > + migf->filp->f_pos = 0; > + mutex_unlock(&migf->lock); > +} > + > +/** > + * ice_vfio_pci_disable_fds - Close migration files of ice vfio pci device > + * @ice_vdev: pointer to ice vfio pci core device structure > + */ > +static void ice_vfio_pci_disable_fds(struct ice_vfio_pci_core_device *ice_vdev) > +{ > + if (ice_vdev->resuming_migf) { > + ice_vfio_pci_disable_fd(ice_vdev->resuming_migf); > + fput(ice_vdev->resuming_migf->filp); > + ice_vdev->resuming_migf = NULL; > + } > + if (ice_vdev->saving_migf) { > + ice_vfio_pci_disable_fd(ice_vdev->saving_migf); > + fput(ice_vdev->saving_migf->filp); > + ice_vdev->saving_migf = NULL; > + } > +} > + > +/* > + * This function is called in all state_mutex unlock cases to > + * handle a 'deferred_reset' if exists. > + * @ice_vdev: pointer to ice vfio pci core device structure > + */ > +static void > +ice_vfio_pci_state_mutex_unlock(struct ice_vfio_pci_core_device *ice_vdev) > +{ > +again: > + spin_lock(&ice_vdev->reset_lock); > + if (ice_vdev->deferred_reset) { > + ice_vdev->deferred_reset = false; > + spin_unlock(&ice_vdev->reset_lock); > + ice_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING; > + ice_vfio_pci_disable_fds(ice_vdev); > + goto again; > + } > + mutex_unlock(&ice_vdev->state_mutex); > + spin_unlock(&ice_vdev->reset_lock); > +} > + > +static void ice_vfio_pci_reset_done(struct pci_dev *pdev) > +{ > + struct ice_vfio_pci_core_device *ice_vdev = > + (struct ice_vfio_pci_core_device *)dev_get_drvdata(&pdev->dev); > + > + /* > + * As the higher VFIO layers are holding locks across reset and using > + * those same locks with the mm_lock we need to prevent ABBA deadlock > + * with the state_mutex and mm_lock. > + * In case the state_mutex was taken already we defer the cleanup work > + * to the unlock flow of the other running context. > + */ > + spin_lock(&ice_vdev->reset_lock); > + ice_vdev->deferred_reset = true; > + if (!mutex_trylock(&ice_vdev->state_mutex)) { > + spin_unlock(&ice_vdev->reset_lock); > + return; > + } > + spin_unlock(&ice_vdev->reset_lock); > + ice_vfio_pci_state_mutex_unlock(ice_vdev); > +} > + > +/** > + * ice_vfio_pci_open_device - Called when a vfio device is probed by VFIO UAPI > + * @core_vdev: the vfio device to open > + * > + * Initialization of the vfio device > + * > + * Returns 0 on success, negative value on error > + */ > +static int ice_vfio_pci_open_device(struct vfio_device *core_vdev) > +{ > + struct ice_vfio_pci_core_device *ice_vdev = container_of(core_vdev, > + struct ice_vfio_pci_core_device, core_device.vdev); > + struct vfio_pci_core_device *vdev = &ice_vdev->core_device; > + int ret; > + > + ret = vfio_pci_core_enable(vdev); > + if (ret) > + return ret; > + > + ret = ice_vfio_migration_init(ice_vdev); > + if (ret) { > + vfio_pci_core_disable(vdev); > + return ret; > + } > + ice_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING; > + vfio_pci_core_finish_enable(vdev); > + > + return 0; > +} > + > +/** > + * ice_vfio_pci_close_device - Called when a vfio device fd is closed > + * @core_vdev: the vfio device to close > + */ > +static void ice_vfio_pci_close_device(struct vfio_device *core_vdev) > +{ > + struct ice_vfio_pci_core_device *ice_vdev = container_of(core_vdev, > + struct ice_vfio_pci_core_device, core_device.vdev); > + > + ice_vfio_pci_disable_fds(ice_vdev); > + vfio_pci_core_close_device(core_vdev); > + ice_vfio_migration_uninit(ice_vdev); > +} > + > +/** > + * ice_vfio_pci_release_file - release ice vfio pci migration file > + * @inode: pointer to inode > + * @filp: pointer to the file to release > + * > + * Return 0 for success, negative for error > + */ > +static int ice_vfio_pci_release_file(struct inode *inode, struct file *filp) > +{ > + struct ice_vfio_pci_migration_file *migf = filp->private_data; > + > + ice_vfio_pci_disable_fd(migf); > + mutex_destroy(&migf->lock); > + kfree(migf); > + return 0; > +} > + > +/** > + * ice_vfio_pci_save_read - save migration file data to user space > + * @filp: pointer to migration file > + * @buf: pointer to user space buffer > + * @len: data length to be saved > + * @pos: should be 0 > + * > + * Return len of saved data, negative for error > + */ > +static ssize_t ice_vfio_pci_save_read(struct file *filp, char __user *buf, > + size_t len, loff_t *pos) > +{ > + struct ice_vfio_pci_migration_file *migf = filp->private_data; > + loff_t *off = &filp->f_pos; > + ssize_t done = 0; > + int ret; > + > + if (pos) > + return -ESPIPE; > + > + mutex_lock(&migf->lock); > + if (*off > migf->total_length) { > + done = -EINVAL; > + goto out_unlock; > + } > + > + if (migf->disabled) { > + done = -ENODEV; > + goto out_unlock; > + } > + > + len = min_t(size_t, migf->total_length - *off, len); > + if (len) { > + ret = copy_to_user(buf, migf->mig_data + *off, len); > + if (ret) { > + done = -EFAULT; > + goto out_unlock; > + } > + *off += len; > + done = len; > + } > +out_unlock: > + mutex_unlock(&migf->lock); > + return done; > +} > + > +static const struct file_operations ice_vfio_pci_save_fops = { > + .owner = THIS_MODULE, > + .read = ice_vfio_pci_save_read, > + .release = ice_vfio_pci_release_file, > + .llseek = no_llseek, > +}; > + > +/** > + * ice_vfio_pci_stop_copy - create migration file and save migration state to it > + * @ice_vdev: pointer to ice vfio pci core device structure > + * > + * Return migration file handler > + */ > +static struct ice_vfio_pci_migration_file * > +ice_vfio_pci_stop_copy(struct ice_vfio_pci_core_device *ice_vdev) > +{ > + struct ice_vfio_pci_migration_file *migf; > + int ret; > + > + migf = kzalloc(sizeof(*migf), GFP_KERNEL); > + if (!migf) > + return ERR_PTR(-ENOMEM); > + > + migf->filp = anon_inode_getfile("ice_vfio_pci_mig", > + &ice_vfio_pci_save_fops, migf, > + O_RDONLY); > + if (IS_ERR(migf->filp)) { > + int err = PTR_ERR(migf->filp); > + > + kfree(migf); > + return ERR_PTR(err); > + } > + > + stream_open(migf->filp->f_inode, migf->filp); > + mutex_init(&migf->lock); > + > + ret = ice_vfio_pci_save_state(ice_vdev, migf); > + if (ret) { > + fput(migf->filp); > + kfree(migf); > + return ERR_PTR(ret); > + } > + > + return migf; > +} > + > +/** > + * ice_vfio_pci_resume_write- copy migration file data from user space > + * @filp: pointer to migration file > + * @buf: pointer to user space buffer > + * @len: data length to be copied > + * @pos: should be 0 > + * > + * Return len of saved data, negative for error > + */ > +static ssize_t > +ice_vfio_pci_resume_write(struct file *filp, const char __user *buf, > + size_t len, loff_t *pos) > +{ > + struct ice_vfio_pci_migration_file *migf = filp->private_data; > + loff_t *off = &filp->f_pos; > + loff_t requested_length; > + ssize_t done = 0; > + int ret; > + > + if (pos) > + return -ESPIPE; > + > + if (*off < 0 || > + check_add_overflow((loff_t)len, *off, &requested_length)) > + return -EINVAL; > + > + if (requested_length > sizeof(migf->mig_data)) > + return -ENOMEM; > + > + mutex_lock(&migf->lock); > + if (migf->disabled) { > + done = -ENODEV; > + goto out_unlock; > + } > + > + ret = copy_from_user(migf->mig_data + *off, buf, len); > + if (ret) { > + done = -EFAULT; > + goto out_unlock; > + } > + *off += len; > + done = len; > + migf->total_length += len; > +out_unlock: > + mutex_unlock(&migf->lock); > + return done; > +} > + > +static const struct file_operations ice_vfio_pci_resume_fops = { > + .owner = THIS_MODULE, > + .write = ice_vfio_pci_resume_write, > + .release = ice_vfio_pci_release_file, > + .llseek = no_llseek, > +}; > + > +/** > + * ice_vfio_pci_resume - create resuming migration file > + * @ice_vdev: pointer to ice vfio pci core device structure > + * > + * Return migration file handler, negative value for failure > + */ > +static struct ice_vfio_pci_migration_file * > +ice_vfio_pci_resume(struct ice_vfio_pci_core_device *ice_vdev) > +{ > + struct ice_vfio_pci_migration_file *migf; > + > + migf = kzalloc(sizeof(*migf), GFP_KERNEL); > + if (!migf) > + return ERR_PTR(-ENOMEM); > + > + migf->filp = anon_inode_getfile("ice_vfio_pci_mig", > + &ice_vfio_pci_resume_fops, migf, > + O_WRONLY); > + if (IS_ERR(migf->filp)) { > + int err = PTR_ERR(migf->filp); > + > + kfree(migf); > + return ERR_PTR(err); > + } > + > + stream_open(migf->filp->f_inode, migf->filp); > + mutex_init(&migf->lock); > + return migf; > +} > + > +/** > + * ice_vfio_pci_step_device_state_locked - process device state change > + * @ice_vdev: pointer to ice vfio pci core device structure > + * @new: new device state > + * @final: final device state > + * > + * Return migration file handler or NULL for success, negative value for failure > + */ > +static struct file * > +ice_vfio_pci_step_device_state_locked(struct ice_vfio_pci_core_device *ice_vdev, > + u32 new, u32 final) > +{ > + u32 cur = ice_vdev->mig_state; > + int ret; > + > + if (cur == VFIO_DEVICE_STATE_RUNNING && > + new == VFIO_DEVICE_STATE_RUNNING_P2P) { > + ice_migration_suspend_dev(ice_vdev->pf, ice_vdev->vf_id); > + return NULL; > + } > + > + if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && > + new == VFIO_DEVICE_STATE_STOP) > + return NULL; This looks suspicious, are we actually able to freeze the internal device state? It should happen here. * RUNNING_P2P -> STOP * STOP_COPY -> STOP * While in STOP the device must stop the operation of the device. The device * must not generate interrupts, DMA, or any other change to external state. * It must not change its internal state. When stopped the device and kernel * migration driver must accept and respond to interaction to support external * subsystems in the STOP state, for example PCI MSI-X and PCI config space. * Failure by the user to restrict device access while in STOP must not result * in error conditions outside the user context (ex. host system faults). * * The STOP_COPY arc will terminate a data transfer session. > + > + if (cur == VFIO_DEVICE_STATE_STOP && > + new == VFIO_DEVICE_STATE_STOP_COPY) { > + struct ice_vfio_pci_migration_file *migf; > + > + migf = ice_vfio_pci_stop_copy(ice_vdev); > + if (IS_ERR(migf)) > + return ERR_CAST(migf); > + get_file(migf->filp); > + ice_vdev->saving_migf = migf; > + return migf->filp; > + } > + > + if (cur == VFIO_DEVICE_STATE_STOP_COPY && > + new == VFIO_DEVICE_STATE_STOP) { > + ice_vfio_pci_disable_fds(ice_vdev); > + return NULL; > + } > + > + if (cur == VFIO_DEVICE_STATE_STOP && > + new == VFIO_DEVICE_STATE_RESUMING) { > + struct ice_vfio_pci_migration_file *migf; > + > + migf = ice_vfio_pci_resume(ice_vdev); > + if (IS_ERR(migf)) > + return ERR_CAST(migf); > + get_file(migf->filp); > + ice_vdev->resuming_migf = migf; > + return migf->filp; > + } > + > + if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) > + return NULL; * RESUMING -> STOP * Leaving RESUMING terminates a data transfer session and indicates the * device should complete processing of the data delivered by write(). The * kernel migration driver should complete the incorporation of data written * to the data transfer FD into the device internal state and perform * final validity and consistency checking of the new device state. If the * user provided data is found to be incomplete, inconsistent, or otherwise * invalid, the migration driver must fail the SET_STATE ioctl and * optionally go to the ERROR state as described below. > + > + if (cur == VFIO_DEVICE_STATE_STOP && > + new == VFIO_DEVICE_STATE_RUNNING_P2P) { > + ret = ice_vfio_pci_load_state(ice_vdev); > + if (ret) > + return ERR_PTR(ret); > + ice_vfio_pci_disable_fds(ice_vdev); STOP is not a state that should have active migration fds, RESUMING -> STOP above is, which is also where we'd expect to see the state loaded. This again makes it suspicious whether the device actually supports stopping and resuming internal state changes. > + return NULL; > + } > + > + if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && > + new == VFIO_DEVICE_STATE_RUNNING) > + return NULL; > + > + /* > + * vfio_mig_get_next_state() does not use arcs other than the above > + */ > + WARN_ON(true); > + return ERR_PTR(-EINVAL); > +} > + > +/** > + * ice_vfio_pci_set_device_state - Config device state > + * @vdev: pointer to vfio pci device > + * @new_state: device state > + * > + * Return 0 for success, negative value for failure. Inaccurate description of return value. > + */ > +static struct file * > +ice_vfio_pci_set_device_state(struct vfio_device *vdev, > + enum vfio_device_mig_state new_state) > +{ > + struct ice_vfio_pci_core_device *ice_vdev = > + container_of(vdev, > + struct ice_vfio_pci_core_device, > + core_device.vdev); > + enum vfio_device_mig_state next_state; > + struct file *res = NULL; > + int ret; > + > + mutex_lock(&ice_vdev->state_mutex); > + while (new_state != ice_vdev->mig_state) { > + ret = vfio_mig_get_next_state(vdev, ice_vdev->mig_state, > + new_state, &next_state); > + if (ret) { > + res = ERR_PTR(ret); > + break; > + } > + res = ice_vfio_pci_step_device_state_locked(ice_vdev, > + next_state, > + new_state); > + if (IS_ERR(res)) > + break; > + ice_vdev->mig_state = next_state; > + if (WARN_ON(res && new_state != ice_vdev->mig_state)) { > + fput(res); > + res = ERR_PTR(-EINVAL); > + break; > + } > + } > + ice_vfio_pci_state_mutex_unlock(ice_vdev); > + return res; > +} > + > +/** > + * ice_vfio_pci_get_device_state - get device state > + * @vdev: pointer to vfio pci device > + * @curr_state: device state > + * > + * Return 0 for success > + */ > +static int ice_vfio_pci_get_device_state(struct vfio_device *vdev, > + enum vfio_device_mig_state *curr_state) > +{ > + struct ice_vfio_pci_core_device *ice_vdev = > + container_of(vdev, > + struct ice_vfio_pci_core_device, > + core_device.vdev); Blank line after variable declaration. > + mutex_lock(&ice_vdev->state_mutex); > + *curr_state = ice_vdev->mig_state; > + ice_vfio_pci_state_mutex_unlock(ice_vdev); > + return 0; > +} > + > +/** > + * ice_vfio_pci_get_data_size - get migration data size > + * @vdev: pointer to vfio pci device > + * @stop_copy_length: migration data size > + * > + * Return 0 for success > + */ > +static int > +ice_vfio_pci_get_data_size(struct vfio_device *vdev, > + unsigned long *stop_copy_length) > +{ > + *stop_copy_length = SZ_128K; > + return 0; > +} > + > +static const struct vfio_migration_ops ice_vfio_pci_migrn_state_ops = { > + .migration_set_state = ice_vfio_pci_set_device_state, > + .migration_get_state = ice_vfio_pci_get_device_state, > + .migration_get_data_size = ice_vfio_pci_get_data_size, > +}; > + > +/** > + * ice_vfio_pci_core_init_dev - initialize vfio device > + * @core_vdev: pointer to vfio device > + * > + * Return 0 for success > + */ > +static int ice_vfio_pci_core_init_dev(struct vfio_device *core_vdev) > +{ > + struct ice_vfio_pci_core_device *ice_vdev = container_of(core_vdev, > + struct ice_vfio_pci_core_device, core_device.vdev); > + > + mutex_init(&ice_vdev->state_mutex); > + spin_lock_init(&ice_vdev->reset_lock); > + > + core_vdev->migration_flags = > + VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P; > + core_vdev->mig_ops = &ice_vfio_pci_migrn_state_ops; > + > + return vfio_pci_core_init_dev(core_vdev); > +} > + > +static const struct vfio_device_ops ice_vfio_pci_ops = { > + .name = "ice-vfio-pci", > + .init = ice_vfio_pci_core_init_dev, > + .release = vfio_pci_core_release_dev, Looks like the release callback should at least cleanup the locks for lockdep rather than use the core function directly. > + .open_device = ice_vfio_pci_open_device, > + .close_device = ice_vfio_pci_close_device, > + .device_feature = vfio_pci_core_ioctl_feature, > + .read = vfio_pci_core_read, > + .write = vfio_pci_core_write, > + .ioctl = vfio_pci_core_ioctl, > + .mmap = vfio_pci_core_mmap, > + .request = vfio_pci_core_request, > + .match = vfio_pci_core_match, > + .bind_iommufd = vfio_iommufd_physical_bind, > + .unbind_iommufd = vfio_iommufd_physical_unbind, > + .attach_ioas = vfio_iommufd_physical_attach_ioas, > + .detach_ioas = vfio_iommufd_physical_detach_ioas, > +}; > + > +/** > + * ice_vfio_pci_probe - Device initialization routine > + * @pdev: PCI device information struct > + * @id: entry in ice_vfio_pci_table > + * > + * Returns 0 on success, negative on failure > + */ > +static int > +ice_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > +{ > + struct ice_vfio_pci_core_device *ice_vdev; > + int ret; > + > + ice_vdev = vfio_alloc_device(ice_vfio_pci_core_device, core_device.vdev, > + &pdev->dev, &ice_vfio_pci_ops); > + if (!ice_vdev) Needs to test IS_ERR(ice_vdev). Thanks, Alex > + return -ENOMEM; > + > + dev_set_drvdata(&pdev->dev, &ice_vdev->core_device); > + > + ret = vfio_pci_core_register_device(&ice_vdev->core_device); > + if (ret) > + goto out_free; > + > + return 0; > + > +out_free: > + vfio_put_device(&ice_vdev->core_device.vdev); > + return ret; > +} > + > +/** > + * ice_vfio_pci_remove - Device removal routine > + * @pdev: PCI device information struct > + */ > +static void ice_vfio_pci_remove(struct pci_dev *pdev) > +{ > + struct ice_vfio_pci_core_device *ice_vdev = > + (struct ice_vfio_pci_core_device *)dev_get_drvdata(&pdev->dev); > + > + vfio_pci_core_unregister_device(&ice_vdev->core_device); > + vfio_put_device(&ice_vdev->core_device.vdev); > +} > + > +/* ice_pci_tbl - PCI Device ID Table > + * > + * Wildcard entries (PCI_ANY_ID) should come last > + * Last entry must be all 0s > + * > + * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, > + * Class, Class Mask, private data (not used) } > + */ > +static const struct pci_device_id ice_vfio_pci_table[] = { > + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_INTEL, 0x1889) }, > + {} > +}; > +MODULE_DEVICE_TABLE(pci, ice_vfio_pci_table); > + > +static const struct pci_error_handlers ice_vfio_pci_core_err_handlers = { > + .reset_done = ice_vfio_pci_reset_done, > + .error_detected = vfio_pci_core_aer_err_detected, > +}; > + > +static struct pci_driver ice_vfio_pci_driver = { > + .name = "ice-vfio-pci", > + .id_table = ice_vfio_pci_table, > + .probe = ice_vfio_pci_probe, > + .remove = ice_vfio_pci_remove, > + .err_handler = &ice_vfio_pci_core_err_handlers, > + .driver_managed_dma = true, > +}; > + > +module_pci_driver(ice_vfio_pci_driver); > + > +MODULE_LICENSE("GPL"); > +MODULE_AUTHOR("Intel Corporation, <linux.nics@xxxxxxxxx>"); > +MODULE_DESCRIPTION(DRIVER_DESC);