From: Jason Gunthorpe <jgg@xxxxxxxxxxxx> This function is intended to loop across each MTT chunk in the implicit parent that intersects the range [io_virt, io_virt+bnct). But it is has a confusing construction, so: - Consistently use imr and odp_imr to refer to the implicit parent to avoid confusion with the normal mr and odp of the child - Directly compute the inclusive start/end indexes by shifting. This is clearer to understand the intent and avoids any errors from unaligned values of addr - Iterate directly over the range of MTT indexes, do not make a loop out of goto - Follow 'success oriented flow', with goto error unwind - Directly calculate the range of idx's that need update_xlt - Ensure that any leaf MR added to the interval tree always results in an update to the XLT Reviewed-by: Artemy Kovalyov <artemyko@xxxxxxxxxxxx> Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxxxx> --- drivers/infiniband/hw/mlx5/odp.c | 123 +++++++++++++++++-------------- 1 file changed, 69 insertions(+), 54 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 3de30317891a23..a56c627e25ae58 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -479,78 +479,93 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, return ERR_PTR(err); } -static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr, - u64 io_virt, size_t bcnt) +static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, + unsigned long idx) { - struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device); - struct ib_umem_odp *odp, *result = NULL; - struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); - u64 addr = io_virt & MLX5_IMR_MTT_MASK; - int nentries = 0, start_idx = 0, ret; + struct ib_umem_odp *odp; struct mlx5_ib_mr *mtt; - mutex_lock(&odp_mr->umem_mutex); - odp = odp_lookup(addr, 1, mr); + odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem), + idx * MLX5_IMR_MTT_SIZE, + MLX5_IMR_MTT_SIZE); + if (IS_ERR(odp)) + return ERR_CAST(odp); - mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n", - io_virt, bcnt, addr, odp); + mtt = implicit_mr_alloc(imr->ibmr.pd, odp, 0, imr->access_flags); + if (IS_ERR(mtt)) { + ib_umem_odp_release(odp); + return mtt; + } -next_mr: - if (likely(odp)) { - if (nentries) - nentries++; - } else { - odp = ib_umem_odp_alloc_child(odp_mr, addr, MLX5_IMR_MTT_SIZE); - if (IS_ERR(odp)) { - mutex_unlock(&odp_mr->umem_mutex); - return ERR_CAST(odp); - } + odp->private = mtt; + mtt->umem = &odp->umem; + mtt->mmkey.iova = idx * MLX5_IMR_MTT_SIZE; + mtt->parent = imr; + INIT_WORK(&odp->work, mr_leaf_free_action); - mtt = implicit_mr_alloc(mr->ibmr.pd, odp, 0, - mr->access_flags); - if (IS_ERR(mtt)) { - mutex_unlock(&odp_mr->umem_mutex); - ib_umem_odp_release(odp); - return ERR_CAST(mtt); - } + xa_store(&mtt->dev->odp_mkeys, mlx5_base_mkey(mtt->mmkey.key), + &mtt->mmkey, GFP_ATOMIC); + return mtt; +} - odp->private = mtt; - mtt->umem = &odp->umem; - mtt->mmkey.iova = addr; - mtt->parent = mr; - INIT_WORK(&odp->work, mr_leaf_free_action); +static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *imr, + u64 io_virt, size_t bcnt) +{ + struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + unsigned long end_idx = (io_virt + bcnt - 1) >> MLX5_IMR_MTT_SHIFT; + unsigned long idx = io_virt >> MLX5_IMR_MTT_SHIFT; + unsigned long inv_start_idx = end_idx + 1; + unsigned long inv_len = 0; + struct ib_umem_odp *result = NULL; + struct ib_umem_odp *odp; + int ret; - xa_store(&dev->odp_mkeys, mlx5_base_mkey(mtt->mmkey.key), - &mtt->mmkey, GFP_ATOMIC); + mutex_lock(&odp_imr->umem_mutex); + odp = odp_lookup(idx * MLX5_IMR_MTT_SIZE, 1, imr); + for (idx = idx; idx <= end_idx; idx++) { + if (unlikely(!odp)) { + struct mlx5_ib_mr *mtt; - if (!nentries) - start_idx = addr >> MLX5_IMR_MTT_SHIFT; - nentries++; - } + mtt = implicit_get_child_mr(imr, idx); + if (IS_ERR(mtt)) { + result = ERR_CAST(mtt); + goto out; + } + odp = to_ib_umem_odp(mtt->umem); + inv_start_idx = min(inv_start_idx, idx); + inv_len = idx - inv_start_idx + 1; + } - /* Return first odp if region not covered by single one */ - if (likely(!result)) - result = odp; + /* Return first odp if region not covered by single one */ + if (likely(!result)) + result = odp; - addr += MLX5_IMR_MTT_SIZE; - if (unlikely(addr < io_virt + bcnt)) { odp = odp_next(odp); - if (odp && ib_umem_start(odp) != addr) + if (odp && ib_umem_start(odp) != idx * MLX5_IMR_MTT_SIZE) odp = NULL; - goto next_mr; } - if (unlikely(nentries)) { - ret = mlx5_ib_update_xlt(mr, start_idx, nentries, 0, - MLX5_IB_UPD_XLT_INDIRECT | + /* + * Any time the children in the interval tree are changed we must + * perform an update of the xlt before exiting to ensure the HW and + * the tree remains synchronized. + */ +out: + if (likely(!inv_len)) + goto out_unlock; + + ret = mlx5_ib_update_xlt(imr, inv_start_idx, inv_len, 0, + MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); - if (ret) { - mlx5_ib_err(dev, "Failed to update PAS\n"); - result = ERR_PTR(ret); - } + if (ret) { + mlx5_ib_err(to_mdev(imr->ibmr.pd->device), + "Failed to update PAS\n"); + result = ERR_PTR(ret); + goto out_unlock; } - mutex_unlock(&odp_mr->umem_mutex); +out_unlock: + mutex_unlock(&odp_imr->umem_mutex); return result; } -- 2.23.0