Add support for SAVING in chunk mode, it includes running a work that will fill the next chunk from the device. In case the number of available chunks will reach the MAX_NUM_CHUNKS, the next chunk SAVING will be delayed till the reader will consume one chunk. The next patch from the series will add the reader part of the chunk mode. Signed-off-by: Yishai Hadas <yishaih@xxxxxxxxxx> --- drivers/vfio/pci/mlx5/cmd.c | 43 +++++++++++++++--- drivers/vfio/pci/mlx5/cmd.h | 12 ++++++ drivers/vfio/pci/mlx5/main.c | 84 +++++++++++++++++++++++++++++++----- 3 files changed, 122 insertions(+), 17 deletions(-) diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index b18735ee5d07..e68bf9ba5300 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -435,6 +435,7 @@ mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf) { spin_lock_irq(&buf->migf->list_lock); + buf->stop_copy_chunk_num = 0; list_add_tail(&buf->buf_elm, &buf->migf->avail_list); spin_unlock_irq(&buf->migf->list_lock); } @@ -551,6 +552,8 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) struct mlx5_vf_migration_file, async_data); if (!status) { + size_t next_required_umem_size = 0; + bool stop_copy_last_chunk; size_t image_size; unsigned long flags; bool initial_pre_copy = migf->state != MLX5_MIGF_STATE_PRE_COPY && @@ -558,6 +561,11 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) image_size = MLX5_GET(save_vhca_state_out, async_data->out, actual_image_size); + if (async_data->buf->stop_copy_chunk_num) + next_required_umem_size = MLX5_GET(save_vhca_state_out, + async_data->out, next_required_umem_size); + stop_copy_last_chunk = async_data->stop_copy_chunk && + !next_required_umem_size; if (async_data->header_buf) { status = add_buf_header(async_data->header_buf, image_size, initial_pre_copy); @@ -569,12 +577,28 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) migf->max_pos += async_data->buf->length; spin_lock_irqsave(&migf->list_lock, flags); list_add_tail(&async_data->buf->buf_elm, &migf->buf_list); + if (async_data->buf->stop_copy_chunk_num) { + migf->num_ready_chunks++; + if (next_required_umem_size && + migf->num_ready_chunks >= MAX_NUM_CHUNKS) { + /* Delay the next SAVE till one chunk be consumed */ + migf->next_required_umem_size = next_required_umem_size; + next_required_umem_size = 0; + } + } spin_unlock_irqrestore(&migf->list_lock, flags); - if (initial_pre_copy) + if (initial_pre_copy) { migf->pre_copy_initial_bytes += image_size; - migf->state = async_data->stop_copy_chunk ? - MLX5_MIGF_STATE_COMPLETE : MLX5_MIGF_STATE_PRE_COPY; + migf->state = MLX5_MIGF_STATE_PRE_COPY; + } + if (stop_copy_last_chunk) + migf->state = MLX5_MIGF_STATE_COMPLETE; wake_up_interruptible(&migf->poll_wait); + if (next_required_umem_size) + mlx5vf_mig_file_set_save_work(migf, + /* Picking up the next chunk num */ + (async_data->buf->stop_copy_chunk_num % MAX_NUM_CHUNKS) + 1, + next_required_umem_size); mlx5vf_save_callback_complete(migf, async_data); return; } @@ -632,10 +656,15 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, } if (MLX5VF_PRE_COPY_SUPP(mvdev)) { - if (async_data->stop_copy_chunk && migf->buf_header[0]) { - header_buf = migf->buf_header[0]; - migf->buf_header[0] = NULL; - } else { + if (async_data->stop_copy_chunk) { + u8 header_idx = buf->stop_copy_chunk_num ? + buf->stop_copy_chunk_num - 1 : 0; + + header_buf = migf->buf_header[header_idx]; + migf->buf_header[header_idx] = NULL; + } + + if (!header_buf) { header_buf = mlx5vf_get_data_buffer(migf, sizeof(struct mlx5_vf_migration_header), DMA_NONE); if (IS_ERR(header_buf)) { diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index 6d8d52804c83..f2c7227fa683 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -83,6 +83,13 @@ struct mlx5vf_async_data { void *out; }; +struct mlx5vf_save_work_data { + struct mlx5_vf_migration_file *migf; + size_t next_required_umem_size; + struct work_struct work; + u8 chunk_num; +}; + #define MAX_NUM_CHUNKS 2 struct mlx5_vf_migration_file { @@ -97,9 +104,12 @@ struct mlx5_vf_migration_file { u32 record_tag; u64 stop_copy_prep_size; u64 pre_copy_initial_bytes; + size_t next_required_umem_size; + u8 num_ready_chunks; /* Upon chunk mode preserve another set of buffers for stop_copy phase */ struct mlx5_vhca_data_buffer *buf[MAX_NUM_CHUNKS]; struct mlx5_vhca_data_buffer *buf_header[MAX_NUM_CHUNKS]; + struct mlx5vf_save_work_data save_data[MAX_NUM_CHUNKS]; spinlock_t list_lock; struct list_head buf_list; struct list_head avail_list; @@ -223,6 +233,8 @@ struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work); +void mlx5vf_mig_file_set_save_work(struct mlx5_vf_migration_file *migf, + u8 chunk_num, size_t next_required_umem_size); int mlx5vf_start_page_tracker(struct vfio_device *vdev, struct rb_root_cached *ranges, u32 nnodes, u64 *page_size); int mlx5vf_stop_page_tracker(struct vfio_device *vdev); diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 351b61303b72..c80caf55499f 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -306,6 +306,73 @@ static void mlx5vf_mark_err(struct mlx5_vf_migration_file *migf) wake_up_interruptible(&migf->poll_wait); } +void mlx5vf_mig_file_set_save_work(struct mlx5_vf_migration_file *migf, + u8 chunk_num, size_t next_required_umem_size) +{ + migf->save_data[chunk_num - 1].next_required_umem_size = + next_required_umem_size; + migf->save_data[chunk_num - 1].migf = migf; + get_file(migf->filp); + queue_work(migf->mvdev->cb_wq, + &migf->save_data[chunk_num - 1].work); +} + +static struct mlx5_vhca_data_buffer * +mlx5vf_mig_file_get_stop_copy_buf(struct mlx5_vf_migration_file *migf, + u8 index, size_t required_length) +{ + struct mlx5_vhca_data_buffer *buf = migf->buf[index]; + u8 chunk_num; + + WARN_ON(!buf); + chunk_num = buf->stop_copy_chunk_num; + buf->migf->buf[index] = NULL; + /* Checking whether the pre-allocated buffer can fit */ + if (buf->allocated_length >= required_length) + return buf; + + mlx5vf_put_data_buffer(buf); + buf = mlx5vf_get_data_buffer(buf->migf, required_length, + DMA_FROM_DEVICE); + if (IS_ERR(buf)) + return buf; + + buf->stop_copy_chunk_num = chunk_num; + return buf; +} + +static void mlx5vf_mig_file_save_work(struct work_struct *_work) +{ + struct mlx5vf_save_work_data *save_data = container_of(_work, + struct mlx5vf_save_work_data, work); + struct mlx5_vf_migration_file *migf = save_data->migf; + struct mlx5vf_pci_core_device *mvdev = migf->mvdev; + struct mlx5_vhca_data_buffer *buf; + + mutex_lock(&mvdev->state_mutex); + if (migf->state == MLX5_MIGF_STATE_ERROR) + goto end; + + buf = mlx5vf_mig_file_get_stop_copy_buf(migf, + save_data->chunk_num - 1, + save_data->next_required_umem_size); + if (IS_ERR(buf)) + goto err; + + if (mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, false)) + goto err_save; + + goto end; + +err_save: + mlx5vf_put_data_buffer(buf); +err: + mlx5vf_mark_err(migf); +end: + mlx5vf_state_mutex_unlock(mvdev); + fput(migf->filp); +} + static int mlx5vf_add_stop_copy_header(struct mlx5_vf_migration_file *migf, bool track) { @@ -400,6 +467,9 @@ static int mlx5vf_prep_stop_copy(struct mlx5vf_pci_core_device *mvdev, if (mvdev->chunk_mode) { migf->buf[i]->stop_copy_chunk_num = i + 1; migf->buf_header[i]->stop_copy_chunk_num = i + 1; + INIT_WORK(&migf->save_data[i].work, + mlx5vf_mig_file_save_work); + migf->save_data[i].chunk_num = i + 1; } } @@ -548,16 +618,10 @@ static int mlx5vf_pci_save_device_inc_data(struct mlx5vf_pci_core_device *mvdev) if (ret) goto err; - /* Checking whether we have a matching pre-allocated buffer that can fit */ - if (migf->buf[0]->allocated_length >= length) { - buf = migf->buf[0]; - migf->buf[0] = NULL; - } else { - buf = mlx5vf_get_data_buffer(migf, length, DMA_FROM_DEVICE); - if (IS_ERR(buf)) { - ret = PTR_ERR(buf); - goto err; - } + buf = mlx5vf_mig_file_get_stop_copy_buf(migf, 0, length); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto err; } ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, false); -- 2.18.1