Register its own handler for pci_error_handlers.reset_done and update state accordingly. Signed-off-by: Yishai Hadas <yishaih@xxxxxxxxxx> Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxx> --- drivers/vfio/pci/mlx5/main.c | 56 ++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 467dee08ad77..a94cb9cdb82e 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -55,8 +55,11 @@ struct mlx5vf_pci_migration_info { struct mlx5vf_pci_core_device { struct vfio_pci_core_device core_device; u8 migrate_cap:1; + u8 deferred_reset:1; /* protect migration state */ struct mutex state_mutex; + /* protect the reset_done flow */ + spinlock_t reset_lock; struct mlx5vf_pci_migration_info vmig; }; @@ -473,6 +476,49 @@ mlx5vf_pci_migration_data_rw(struct mlx5vf_pci_core_device *mvdev, return count; } +/* + * This function is called in all state_mutex unlock cases to + * handle a 'deferred_reset' if exists. + */ +static void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev) +{ +again: + spin_lock(&mvdev->reset_lock); + if (mvdev->deferred_reset) { + mvdev->deferred_reset = false; + spin_unlock(&mvdev->reset_lock); + mlx5vf_reset_mig_state(mvdev); + mvdev->vmig.vfio_dev_state = VFIO_DEVICE_STATE_RUNNING; + goto again; + } + mutex_unlock(&mvdev->state_mutex); + spin_unlock(&mvdev->reset_lock); +} + +static void mlx5vf_pci_aer_reset_done(struct pci_dev *pdev) +{ + struct mlx5vf_pci_core_device *mvdev = dev_get_drvdata(&pdev->dev); + + if (!mvdev->migrate_cap) + return; + + /* + * As the higher VFIO layers are holding locks across reset and using + * those same locks with the mm_lock we need to prevent ABBA deadlock + * with the state_mutex and mm_lock. + * In case the state_mutex was taken already we defer the cleanup work + * to the unlock flow of the other running context. + */ + spin_lock(&mvdev->reset_lock); + mvdev->deferred_reset = true; + if (!mutex_trylock(&mvdev->state_mutex)) { + spin_unlock(&mvdev->reset_lock); + return; + } + spin_unlock(&mvdev->reset_lock); + mlx5vf_state_mutex_unlock(mvdev); +} + static ssize_t mlx5vf_pci_mig_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) @@ -541,7 +587,7 @@ static ssize_t mlx5vf_pci_mig_rw(struct vfio_pci_core_device *vdev, } end: - mutex_unlock(&mvdev->state_mutex); + mlx5vf_state_mutex_unlock(mvdev); return ret; } @@ -636,6 +682,7 @@ static int mlx5vf_pci_probe(struct pci_dev *pdev, if (MLX5_CAP_GEN(mdev, migration)) { mvdev->migrate_cap = 1; mutex_init(&mvdev->state_mutex); + spin_lock_init(&mvdev->reset_lock); } mlx5_vf_put_core_dev(mdev); } @@ -670,12 +717,17 @@ static const struct pci_device_id mlx5vf_pci_table[] = { MODULE_DEVICE_TABLE(pci, mlx5vf_pci_table); +const struct pci_error_handlers mlx5vf_err_handlers = { + .reset_done = mlx5vf_pci_aer_reset_done, + .error_detected = vfio_pci_core_aer_err_detected, +}; + static struct pci_driver mlx5vf_pci_driver = { .name = KBUILD_MODNAME, .id_table = mlx5vf_pci_table, .probe = mlx5vf_pci_probe, .remove = mlx5vf_pci_remove, - .err_handler = &vfio_pci_core_err_handlers, + .err_handler = &mlx5vf_err_handlers, }; static void __exit mlx5vf_pci_cleanup(void) -- 2.18.1