On 09/11/2022 20:04, Jason Gunthorpe wrote:
On Sun, Nov 06, 2022 at 07:46:22PM +0200, Yishai Hadas wrote:
Enforce a single SAVE command at a time.
As the SAVE command is an asynchronous one, we must enforce running only
a single command at a time.
This will preserve ordering between multiple calls and protect from
races on the migration file data structure.
This is a must for the next patches from the series where as part of
PRE_COPY we may have multiple images to be saved and multiple SAVE
commands may be issued from different flows.
Signed-off-by: Yishai Hadas <yishaih@xxxxxxxxxx>
---
drivers/vfio/pci/mlx5/cmd.c | 5 +++++
drivers/vfio/pci/mlx5/cmd.h | 2 ++
drivers/vfio/pci/mlx5/main.c | 1 +
3 files changed, 8 insertions(+)
This should just use a 'counting completion' instead of open coding
one.
Makes sense, will change accordingly as part of V1.
Yishai
diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index 0848bc905d3e..b9ed2f1c8689 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -281,6 +281,8 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
mlx5_core_dealloc_pd(mdev, async_data->pdn);
kvfree(async_data->out);
+ migf->save_cb_active = false;
+ wake_up(&migf->save_wait);
complete()
fput(migf->filp);
}
@@ -321,6 +323,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
return -ENOTCONN;
mdev = mvdev->mdev;
+ wait_event(migf->save_wait, !migf->save_cb_active);
wait_for_completion_interruptible()
err = mlx5_core_alloc_pd(mdev, &pdn);
if (err)
return err;
@@ -353,6 +356,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
get_file(migf->filp);
async_data->mkey = mkey;
async_data->pdn = pdn;
+ migf->save_cb_active = true;
err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in),
async_data->out,
out_size, mlx5vf_save_callback,
@@ -371,6 +375,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
err_dma_map:
mlx5_core_dealloc_pd(mdev, pdn);
+ migf->save_cb_active = false;
complete()
return err;
}
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index 921d5720a1e5..b1c5dd2ff144 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -26,6 +26,7 @@ struct mlx5_vf_migration_file {
struct mutex lock;
u8 disabled:1;
u8 is_err:1;
+ u8 save_cb_active:1;
struct sg_append_table table;
size_t total_length;
@@ -37,6 +38,7 @@ struct mlx5_vf_migration_file {
unsigned long last_offset;
struct mlx5vf_pci_core_device *mvdev;
wait_queue_head_t poll_wait;
+ wait_queue_head_t save_wait;
struct mlx5_async_ctx async_ctx;
struct mlx5vf_async_data async_data;
};
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 4c7a39ffd247..5da278f3c31c 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -245,6 +245,7 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
stream_open(migf->filp->f_inode, migf->filp);
mutex_init(&migf->lock);
init_waitqueue_head(&migf->poll_wait);
+ init_waitqueue_head(&migf->save_wait);
init_completion()
complete()
Jason