Let firmware knows upon leaving PRE_COPY back to RUNNING as of some error in the target/migration cancellation. This will let firmware cleaning its internal resources that were turned on upon PRE_COPY. The flow is based on the device specification in this area. Signed-off-by: Yishai Hadas <yishaih@xxxxxxxxxx> --- drivers/vfio/pci/mlx5/cmd.c | 14 +++++++++---- drivers/vfio/pci/mlx5/cmd.h | 4 +++- drivers/vfio/pci/mlx5/main.c | 39 +++++++++++++++++++++++++++++------- 3 files changed, 45 insertions(+), 12 deletions(-) diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index 65135c614b5e..320751dac377 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -108,8 +108,9 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, ret = wait_for_completion_interruptible(&mvdev->saving_migf->save_comp); if (ret) return ret; - if (mvdev->saving_migf->state == - MLX5_MIGF_STATE_PRE_COPY_ERROR) { + /* Upon cleanup, ignore previous pre_copy error state */ + if (mvdev->saving_migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR && + !(query_flags & MLX5VF_QUERY_CLEANUP)) { /* * In case we had a PRE_COPY error, only query full * image for final image @@ -200,7 +201,7 @@ void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev) /* Must be done outside the lock to let it progress */ set_tracker_error(mvdev); mutex_lock(&mvdev->state_mutex); - mlx5vf_disable_fds(mvdev); + mlx5vf_disable_fds(mvdev, NULL); _mlx5vf_free_page_tracker_resources(mvdev); mlx5vf_state_mutex_unlock(mvdev); } @@ -639,6 +640,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; struct mlx5_vhca_data_buffer *header_buf = NULL; struct mlx5vf_async_data *async_data; + bool pre_copy_cleanup = false; int err; lockdep_assert_held(&mvdev->state_mutex); @@ -649,6 +651,10 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, if (err) return err; + if ((migf->state == MLX5_MIGF_STATE_PRE_COPY || + migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) && !track && !inc) + pre_copy_cleanup = true; + if (migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) /* * In case we had a PRE_COPY error, SAVE is triggered only for @@ -667,7 +673,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, async_data = &migf->async_data; async_data->buf = buf; - async_data->stop_copy_chunk = !track; + async_data->stop_copy_chunk = (!track && !pre_copy_cleanup); async_data->out = kvzalloc(out_size, GFP_KERNEL); if (!async_data->out) { err = -ENOMEM; diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index 09d0ed6e2345..116ed9326421 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -197,6 +197,7 @@ struct mlx5vf_pci_core_device { enum { MLX5VF_QUERY_INC = (1UL << 0), MLX5VF_QUERY_FINAL = (1UL << 1), + MLX5VF_QUERY_CLEANUP = (1UL << 2), }; int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod); @@ -232,7 +233,8 @@ int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, unsigned long offset); void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev); -void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev); +void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev, + enum mlx5_vf_migf_state *last_save_state); void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work); void mlx5vf_mig_file_set_save_work(struct mlx5_vf_migration_file *migf, u8 chunk_num, size_t next_required_umem_size); diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index fe09a8c8af95..3982fcf60cf2 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -1146,7 +1146,8 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) return ERR_PTR(ret); } -void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev) +void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev, + enum mlx5_vf_migf_state *last_save_state) { if (mvdev->resuming_migf) { mlx5vf_disable_fd(mvdev->resuming_migf); @@ -1157,6 +1158,8 @@ void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev) if (mvdev->saving_migf) { mlx5_cmd_cleanup_async_ctx(&mvdev->saving_migf->async_ctx); cancel_work_sync(&mvdev->saving_migf->async_data.work); + if (last_save_state) + *last_save_state = mvdev->saving_migf->state; mlx5vf_disable_fd(mvdev->saving_migf); wake_up_interruptible(&mvdev->saving_migf->poll_wait); mlx5fv_cmd_clean_migf_resources(mvdev->saving_migf); @@ -1217,12 +1220,34 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, return migf->filp; } - if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) || - (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) || + if (cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) { + mlx5vf_disable_fds(mvdev, NULL); + return NULL; + } + + if ((cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) || (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_RUNNING_P2P)) { - mlx5vf_disable_fds(mvdev); - return NULL; + struct mlx5_vf_migration_file *migf = mvdev->saving_migf; + struct mlx5_vhca_data_buffer *buf; + enum mlx5_vf_migf_state state; + size_t size; + + ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &size, NULL, + MLX5VF_QUERY_INC | MLX5VF_QUERY_CLEANUP); + if (ret) + return ERR_PTR(ret); + buf = mlx5vf_get_data_buffer(migf, size, DMA_FROM_DEVICE); + if (IS_ERR(buf)) + return ERR_CAST(buf); + /* pre_copy cleanup */ + ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, false, false); + if (ret) { + mlx5vf_put_data_buffer(buf); + return ERR_PTR(ret); + } + mlx5vf_disable_fds(mvdev, &state); + return (state != MLX5_MIGF_STATE_ERROR) ? NULL : ERR_PTR(-EIO); } if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) { @@ -1244,7 +1269,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, if (ret) return ERR_PTR(ret); } - mlx5vf_disable_fds(mvdev); + mlx5vf_disable_fds(mvdev, NULL); return NULL; } @@ -1289,7 +1314,7 @@ void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev) mvdev->deferred_reset = false; spin_unlock(&mvdev->reset_lock); mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING; - mlx5vf_disable_fds(mvdev); + mlx5vf_disable_fds(mvdev, NULL); goto again; } mutex_unlock(&mvdev->state_mutex); -- 2.18.1