From: Shay Drory <shayd@xxxxxxxxxx> In order to support PRE_COPY, mlx5 driver transfers multiple states (images) of the device. e.g.: the source VF can save and transfer multiple states, and the target VF will load them by that order. This patch implements the changes for the target VF to decompose the header for each state and to write and load multiple states. Signed-off-by: Shay Drory <shayd@xxxxxxxxxx> Signed-off-by: Yishai Hadas <yishaih@xxxxxxxxxx> --- drivers/vfio/pci/mlx5/cmd.c | 12 ++--- drivers/vfio/pci/mlx5/cmd.h | 2 + drivers/vfio/pci/mlx5/main.c | 98 ++++++++++++++++++++++++++++++------ 3 files changed, 89 insertions(+), 23 deletions(-) diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index 2d2171191218..a1b17cd688b9 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -420,16 +420,14 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, if (mvdev->mdev_detach) return -ENOTCONN; - mutex_lock(&migf->lock); - if (!migf->image_length) { - err = -EINVAL; - goto end; - } + lockdep_assert_held(&migf->lock); + if (!migf->image_length) + return -EINVAL; mdev = mvdev->mdev; err = mlx5_core_alloc_pd(mdev, &pdn); if (err) - goto end; + return err; err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0); if (err) @@ -454,8 +452,6 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0); err_reg: mlx5_core_dealloc_pd(mdev, pdn); -end: - mutex_unlock(&migf->lock); return err; } diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index 3b0411e4a74e..03f3b5e99879 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -39,6 +39,8 @@ struct mlx5_vf_migration_file { size_t table_start_pos; size_t image_length; size_t allocated_length; + size_t expected_length; + struct mlx5_vf_migration_header header; size_t sw_headers_bytes_sent; /* * The device can be moved to stop_copy before the previous state was diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index c0ee121bd5ea..6cdd4fc93818 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -569,12 +569,45 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track) return ERR_PTR(ret); } +static void mlx5vf_recv_sw_header(struct mlx5_vf_migration_file *migf, + loff_t *pos, const char __user **buf, + size_t *len, ssize_t *done) +{ + ssize_t header_size = sizeof(migf->header); + void *header_buf = &migf->header; + size_t size_to_recv; + + size_to_recv = header_size - (migf->sw_headers_bytes_sent % header_size); + size_to_recv = min_t(size_t, size_to_recv, *len); + header_buf += header_size - size_to_recv; + if (copy_from_user(header_buf, *buf, size_to_recv)) { + *done = -EFAULT; + return; + } + + *pos += size_to_recv; + *len -= size_to_recv; + *done += size_to_recv; + *buf += size_to_recv; + migf->sw_headers_bytes_sent += size_to_recv; + migf->header_read = !(migf->sw_headers_bytes_sent % header_size); + + if (migf->sw_headers_bytes_sent % header_size) + return; + migf->expected_length = migf->header.image_size; +} + +#define EXPECTED_TABLE_END_POSITION(migf) \ + (migf->table_start_pos + migf->expected_length + \ + migf->sw_headers_bytes_sent) + static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, size_t len, loff_t *pos) { struct mlx5_vf_migration_file *migf = filp->private_data; loff_t requested_length; ssize_t done = 0; + int ret = 0; if (pos) return -ESPIPE; @@ -584,33 +617,47 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, check_add_overflow((loff_t)len, *pos, &requested_length)) return -EINVAL; - if (requested_length > MAX_MIGRATION_SIZE) - return -ENOMEM; - + mutex_lock(&migf->mvdev->state_mutex); mutex_lock(&migf->lock); + requested_length -= migf->table_start_pos; + if (requested_length > MAX_MIGRATION_SIZE) { + ret = -ENOMEM; + goto out_unlock; + } + if (migf->disabled) { - done = -ENODEV; + ret = -ENODEV; goto out_unlock; } +start_over: if (migf->allocated_length < requested_length) { - done = mlx5vf_add_migration_pages( + ret = mlx5vf_add_migration_pages( migf, DIV_ROUND_UP(requested_length - migf->allocated_length, PAGE_SIZE), &migf->table); - if (done) + if (ret) + goto out_unlock; + } + + if (VFIO_PRE_COPY_SUPP(migf->mvdev)) { + if (!migf->header_read) + mlx5vf_recv_sw_header(migf, pos, &buf, &len, &done); + if (done < 0) goto out_unlock; } while (len) { + unsigned long offset; size_t page_offset; struct page *page; size_t page_len; u8 *to_buff; - int ret; - page_offset = (*pos) % PAGE_SIZE; - page = mlx5vf_get_migration_page(migf, *pos - page_offset, + offset = *pos - mlx5vf_get_table_start_pos(migf); + page_offset = offset % PAGE_SIZE; + offset -= page_offset; + page = mlx5vf_get_migration_page(migf, offset, &migf->table); if (!page) { if (done == 0) @@ -619,11 +666,15 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, } page_len = min_t(size_t, len, PAGE_SIZE - page_offset); + if (VFIO_PRE_COPY_SUPP(migf->mvdev)) + page_len = min_t(size_t, page_len, + EXPECTED_TABLE_END_POSITION(migf) - *pos); + to_buff = kmap_local_page(page); ret = copy_from_user(to_buff + page_offset, buf, page_len); kunmap_local(to_buff); if (ret) { - done = -EFAULT; + ret = -EFAULT; goto out_unlock; } *pos += page_len; @@ -631,10 +682,22 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, done += page_len; buf += page_len; migf->image_length += page_len; + + if (*pos == EXPECTED_TABLE_END_POSITION(migf)) { + ret = mlx5vf_cmd_load_vhca_state(migf->mvdev, migf); + if (ret) + goto out_unlock; + mlx5vf_prep_next_table(migf); + if (len) { + requested_length -= migf->expected_length; + goto start_over; + } + } } out_unlock: mutex_unlock(&migf->lock); - return done; + mlx5vf_state_mutex_unlock(migf->mvdev); + return ret ? ret : done; } static const struct file_operations mlx5vf_resume_fops = { @@ -663,6 +726,7 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) } stream_open(migf->filp->f_inode, migf->filp); mutex_init(&migf->lock); + migf->mvdev = mvdev; return migf; } @@ -754,10 +818,14 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, } if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) { - ret = mlx5vf_cmd_load_vhca_state(mvdev, - mvdev->resuming_migf); - if (ret) - return ERR_PTR(ret); + if (!VFIO_PRE_COPY_SUPP(mvdev)) { + mutex_lock(&mvdev->resuming_migf->lock); + ret = mlx5vf_cmd_load_vhca_state(mvdev, + mvdev->resuming_migf); + mutex_unlock(&mvdev->resuming_migf->lock); + if (ret) + return ERR_PTR(ret); + } mlx5vf_disable_fds(mvdev); return NULL; } -- 2.18.1