On Mon, 11 Mar 2024 11:40:11 +0200 Yishai Hadas <yishaih@xxxxxxxxxx> wrote: > On 06/03/2024 12:56, Yishai Hadas wrote: > > Enable live migration only once the firmware supports PRE_COPY. > > > > PRE_COPY has been supported by the firmware for a long time already [1] > > and is required to achieve a low downtime upon live migration. > > > > This lets us clean up some old code that is not applicable those days > > while PRE_COPY is fully supported by the firmware. > > > > [1] The minimum firmware version that supports PRE_COPY is 28.36.1010, > > it was released on January 23. "on January 23" reads to me as "on January 23rd", but I find the release notes[1] are dated February 02, 2023, so I think the intent is "in January 2023". I'll apply with that change. Thanks, Alex [1]https://docs.nvidia.com/networking/display/connectx7firmwarev28361010 > > > > No firmware without PRE_COPY support ever available to users. > > > > Signed-off-by: Yishai Hadas <yishaih@xxxxxxxxxx> > > Hi Alex, > > Are we OK here ? > > It's a cleanup patch for a non applicable code. > > Thanks, > Yishai > > > --- > > drivers/vfio/pci/mlx5/cmd.c | 83 +++++++++++++++++++------- > > drivers/vfio/pci/mlx5/cmd.h | 6 -- > > drivers/vfio/pci/mlx5/main.c | 109 +++-------------------------------- > > 3 files changed, 71 insertions(+), 127 deletions(-) > > > > diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c > > index c54bcd5d0917..41a4b0cf4297 100644 > > --- a/drivers/vfio/pci/mlx5/cmd.c > > +++ b/drivers/vfio/pci/mlx5/cmd.c > > @@ -233,6 +233,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, > > if (!MLX5_CAP_GEN(mvdev->mdev, migration)) > > goto end; > > > > + if (!(MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) && > > + MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))) > > + goto end; > > + > > mvdev->vf_id = pci_iov_vf_id(pdev); > > if (mvdev->vf_id < 0) > > goto end; > > @@ -262,17 +266,14 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, > > mvdev->migrate_cap = 1; > > mvdev->core_device.vdev.migration_flags = > > VFIO_MIGRATION_STOP_COPY | > > - VFIO_MIGRATION_P2P; > > + VFIO_MIGRATION_P2P | > > + VFIO_MIGRATION_PRE_COPY; > > + > > mvdev->core_device.vdev.mig_ops = mig_ops; > > init_completion(&mvdev->tracker_comp); > > if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization)) > > mvdev->core_device.vdev.log_ops = log_ops; > > > > - if (MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) && > > - MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state)) > > - mvdev->core_device.vdev.migration_flags |= > > - VFIO_MIGRATION_PRE_COPY; > > - > > if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks)) > > mvdev->chunk_mode = 1; > > > > @@ -414,6 +415,50 @@ void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf) > > kfree(buf); > > } > > > > +static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, > > + unsigned int npages) > > +{ > > + unsigned int to_alloc = npages; > > + struct page **page_list; > > + unsigned long filled; > > + unsigned int to_fill; > > + int ret; > > + > > + to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); > > + page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT); > > + if (!page_list) > > + return -ENOMEM; > > + > > + do { > > + filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill, > > + page_list); > > + if (!filled) { > > + ret = -ENOMEM; > > + goto err; > > + } > > + to_alloc -= filled; > > + ret = sg_alloc_append_table_from_pages( > > + &buf->table, page_list, filled, 0, > > + filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC, > > + GFP_KERNEL_ACCOUNT); > > + > > + if (ret) > > + goto err; > > + buf->allocated_length += filled * PAGE_SIZE; > > + /* clean input for another bulk allocation */ > > + memset(page_list, 0, filled * sizeof(*page_list)); > > + to_fill = min_t(unsigned int, to_alloc, > > + PAGE_SIZE / sizeof(*page_list)); > > + } while (to_alloc > 0); > > + > > + kvfree(page_list); > > + return 0; > > + > > +err: > > + kvfree(page_list); > > + return ret; > > +} > > + > > struct mlx5_vhca_data_buffer * > > mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, > > size_t length, > > @@ -680,22 +725,20 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, > > goto err_out; > > } > > > > - if (MLX5VF_PRE_COPY_SUPP(mvdev)) { > > - if (async_data->stop_copy_chunk) { > > - u8 header_idx = buf->stop_copy_chunk_num ? > > - buf->stop_copy_chunk_num - 1 : 0; > > + if (async_data->stop_copy_chunk) { > > + u8 header_idx = buf->stop_copy_chunk_num ? > > + buf->stop_copy_chunk_num - 1 : 0; > > > > - header_buf = migf->buf_header[header_idx]; > > - migf->buf_header[header_idx] = NULL; > > - } > > + header_buf = migf->buf_header[header_idx]; > > + migf->buf_header[header_idx] = NULL; > > + } > > > > - if (!header_buf) { > > - header_buf = mlx5vf_get_data_buffer(migf, > > - sizeof(struct mlx5_vf_migration_header), DMA_NONE); > > - if (IS_ERR(header_buf)) { > > - err = PTR_ERR(header_buf); > > - goto err_free; > > - } > > + if (!header_buf) { > > + header_buf = mlx5vf_get_data_buffer(migf, > > + sizeof(struct mlx5_vf_migration_header), DMA_NONE); > > + if (IS_ERR(header_buf)) { > > + err = PTR_ERR(header_buf); > > + goto err_free; > > } > > } > > > > diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h > > index 707393df36c4..df421dc6de04 100644 > > --- a/drivers/vfio/pci/mlx5/cmd.h > > +++ b/drivers/vfio/pci/mlx5/cmd.h > > @@ -13,9 +13,6 @@ > > #include <linux/mlx5/cq.h> > > #include <linux/mlx5/qp.h> > > > > -#define MLX5VF_PRE_COPY_SUPP(mvdev) \ > > - ((mvdev)->core_device.vdev.migration_flags & VFIO_MIGRATION_PRE_COPY) > > - > > enum mlx5_vf_migf_state { > > MLX5_MIGF_STATE_ERROR = 1, > > MLX5_MIGF_STATE_PRE_COPY_ERROR, > > @@ -25,7 +22,6 @@ enum mlx5_vf_migf_state { > > }; > > > > enum mlx5_vf_load_state { > > - MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER, > > MLX5_VF_LOAD_STATE_READ_HEADER, > > MLX5_VF_LOAD_STATE_PREP_HEADER_DATA, > > MLX5_VF_LOAD_STATE_READ_HEADER_DATA, > > @@ -228,8 +224,6 @@ struct mlx5_vhca_data_buffer * > > mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, > > size_t length, enum dma_data_direction dma_dir); > > void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf); > > -int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, > > - unsigned int npages); > > struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, > > unsigned long offset); > > void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev); > > diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c > > index 3982fcf60cf2..61d9b0f9146d 100644 > > --- a/drivers/vfio/pci/mlx5/main.c > > +++ b/drivers/vfio/pci/mlx5/main.c > > @@ -65,50 +65,6 @@ mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, > > return NULL; > > } > > > > -int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, > > - unsigned int npages) > > -{ > > - unsigned int to_alloc = npages; > > - struct page **page_list; > > - unsigned long filled; > > - unsigned int to_fill; > > - int ret; > > - > > - to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); > > - page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT); > > - if (!page_list) > > - return -ENOMEM; > > - > > - do { > > - filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill, > > - page_list); > > - if (!filled) { > > - ret = -ENOMEM; > > - goto err; > > - } > > - to_alloc -= filled; > > - ret = sg_alloc_append_table_from_pages( > > - &buf->table, page_list, filled, 0, > > - filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC, > > - GFP_KERNEL_ACCOUNT); > > - > > - if (ret) > > - goto err; > > - buf->allocated_length += filled * PAGE_SIZE; > > - /* clean input for another bulk allocation */ > > - memset(page_list, 0, filled * sizeof(*page_list)); > > - to_fill = min_t(unsigned int, to_alloc, > > - PAGE_SIZE / sizeof(*page_list)); > > - } while (to_alloc > 0); > > - > > - kvfree(page_list); > > - return 0; > > - > > -err: > > - kvfree(page_list); > > - return ret; > > -} > > - > > static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf) > > { > > mutex_lock(&migf->lock); > > @@ -777,36 +733,6 @@ mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf, > > return 0; > > } > > > > -static int > > -mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf, > > - loff_t requested_length, > > - const char __user **buf, size_t *len, > > - loff_t *pos, ssize_t *done) > > -{ > > - int ret; > > - > > - if (requested_length > MAX_LOAD_SIZE) > > - return -ENOMEM; > > - > > - if (vhca_buf->allocated_length < requested_length) { > > - ret = mlx5vf_add_migration_pages( > > - vhca_buf, > > - DIV_ROUND_UP(requested_length - vhca_buf->allocated_length, > > - PAGE_SIZE)); > > - if (ret) > > - return ret; > > - } > > - > > - while (*len) { > > - ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos, > > - done); > > - if (ret) > > - return ret; > > - } > > - > > - return 0; > > -} > > - > > static ssize_t > > mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf, > > struct mlx5_vhca_data_buffer *vhca_buf, > > @@ -1038,13 +964,6 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, > > migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE; > > break; > > } > > - case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER: > > - ret = mlx5vf_resume_read_image_no_header(vhca_buf, > > - requested_length, > > - &buf, &len, pos, &done); > > - if (ret) > > - goto out_unlock; > > - break; > > case MLX5_VF_LOAD_STATE_READ_IMAGE: > > ret = mlx5vf_resume_read_image(migf, vhca_buf, > > migf->record_size, > > @@ -1114,21 +1033,16 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) > > } > > > > migf->buf[0] = buf; > > - if (MLX5VF_PRE_COPY_SUPP(mvdev)) { > > - buf = mlx5vf_alloc_data_buffer(migf, > > - sizeof(struct mlx5_vf_migration_header), DMA_NONE); > > - if (IS_ERR(buf)) { > > - ret = PTR_ERR(buf); > > - goto out_buf; > > - } > > - > > - migf->buf_header[0] = buf; > > - migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER; > > - } else { > > - /* Initial state will be to read the image */ > > - migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER; > > + buf = mlx5vf_alloc_data_buffer(migf, > > + sizeof(struct mlx5_vf_migration_header), DMA_NONE); > > + if (IS_ERR(buf)) { > > + ret = PTR_ERR(buf); > > + goto out_buf; > > } > > > > + migf->buf_header[0] = buf; > > + migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER; > > + > > stream_open(migf->filp->f_inode, migf->filp); > > mutex_init(&migf->lock); > > INIT_LIST_HEAD(&migf->buf_list); > > @@ -1262,13 +1176,6 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, > > } > > > > if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) { > > - if (!MLX5VF_PRE_COPY_SUPP(mvdev)) { > > - ret = mlx5vf_cmd_load_vhca_state(mvdev, > > - mvdev->resuming_migf, > > - mvdev->resuming_migf->buf[0]); > > - if (ret) > > - return ERR_PTR(ret); > > - } > > mlx5vf_disable_fds(mvdev, NULL); > > return NULL; > > } >