From: Bodong Wang <bodong@xxxxxxxxxxxx> Enable RAW QP to be able to configure burst control by modify_qp. By using burst control with rate limiting, user can achieve best performance and accuracy. The burst control information is passed by user through udata. This patch also reports burst control capability for mlx5 related hardwares, burst control is only marked as supported when both packet_pacing_burst_bound and packet_pacing_typical_size are supported. Signed-off-by: Bodong Wang <bodong@xxxxxxxxxxxx> Reviewed-by: Daniel Jurgens <danielj@xxxxxxxxxxxx> Reviewed-by: Yishai Hadas <yishaih@xxxxxxxxxxxx> Signed-off-by: Leon Romanovsky <leon@xxxxxxxxxx> --- drivers/infiniband/hw/mlx5/main.c | 4 +++ drivers/infiniband/hw/mlx5/qp.c | 52 +++++++++++++++++++++++++++++++++++++-- include/uapi/rdma/mlx5-abi.h | 18 +++++++++++++- 3 files changed, 71 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 298a2a9ca9e3..cfe18c43b5e5 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -980,6 +980,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_CAP_QOS(mdev, packet_pacing_min_rate); resp.packet_pacing_caps.supported_qpts |= 1 << IB_QPT_RAW_PACKET; + if (MLX5_CAP_QOS(mdev, packet_pacing_burst_bound) && + MLX5_CAP_QOS(mdev, packet_pacing_typical_size)) + resp.packet_pacing_caps.cap_flags |= + MLX5_IB_PP_SUPPORT_BURST; } resp.response_length += sizeof(resp.packet_pacing_caps); } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index edad8ac74a70..5bce1fa4060c 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2911,7 +2911,9 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, - enum ib_qp_state cur_state, enum ib_qp_state new_state) + enum ib_qp_state cur_state, + enum ib_qp_state new_state, + const struct mlx5_ib_modify_qp *ucmd) { static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = { [MLX5_QP_STATE_RST] = { @@ -3150,6 +3152,29 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_RATE_LIMIT) { raw_qp_param.rl.rate = attr->rate_limit; + + if (ucmd->burst_info.max_burst_sz) { + if (attr->rate_limit && + MLX5_CAP_QOS(dev->mdev, packet_pacing_burst_bound)) { + raw_qp_param.rl.max_burst_sz = + ucmd->burst_info.max_burst_sz; + } else { + err = -EINVAL; + goto out; + } + } + + if (ucmd->burst_info.typical_pkt_sz) { + if (attr->rate_limit && + MLX5_CAP_QOS(dev->mdev, packet_pacing_typical_size)) { + raw_qp_param.rl.typical_pkt_sz = + ucmd->burst_info.typical_pkt_sz; + } else { + err = -EINVAL; + goto out; + } + } + raw_qp_param.set_mask |= MLX5_RAW_QP_RATE_LIMIT; } @@ -3336,8 +3361,10 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_ib_modify_qp ucmd = {}; enum ib_qp_type qp_type; enum ib_qp_state cur_state, new_state; + size_t required_cmd_sz; int err = -EINVAL; int port; enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; @@ -3345,6 +3372,26 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (ibqp->rwq_ind_tbl) return -ENOSYS; + if (udata && udata->inlen) { + required_cmd_sz = offsetof(typeof(ucmd), reserved) + + sizeof(ucmd.reserved); + if (udata->inlen < required_cmd_sz) + return -EINVAL; + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) + return -EOPNOTSUPP; + + if (ib_copy_from_udata(&ucmd, udata, + min(udata->inlen, sizeof(ucmd)))) + return -EFAULT; + + if (ucmd.comp_mask || + memchr_inv(&ucmd.reserved, 0, sizeof(ucmd.reserved))) + return -EOPNOTSUPP; + } + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask); @@ -3425,7 +3472,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } - err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); + err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, + new_state, &ucmd); out: mutex_unlock(&qp->mutex); diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 1111aa4e7c1e..491659d5e4a5 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -163,6 +163,10 @@ struct mlx5_ib_cqe_comp_caps { __u32 supported_format; /* enum mlx5_ib_cqe_comp_res_format */ }; +enum mlx5_ib_packet_pacing_cap_flags { + MLX5_IB_PP_SUPPORT_BURST = 1 << 0, +}; + struct mlx5_packet_pacing_caps { __u32 qp_rate_limit_min; __u32 qp_rate_limit_max; /* In kpbs */ @@ -172,7 +176,8 @@ struct mlx5_packet_pacing_caps { * supported_qpts |= 1 << IB_QPT_RAW_PACKET */ __u32 supported_qpts; - __u32 reserved; + __u8 cap_flags; /* enum mlx5_ib_packet_pacing_cap_flags */ + __u8 reserved[3]; }; enum mlx5_ib_mpw_caps { @@ -362,6 +367,17 @@ struct mlx5_ib_create_ah_resp { __u8 reserved[6]; }; +struct mlx5_ib_burst_info { + __u32 max_burst_sz; + __u16 typical_pkt_sz; +}; + +struct mlx5_ib_modify_qp { + __u32 comp_mask; + struct mlx5_ib_burst_info burst_info; + __u8 reserved[6]; +}; + struct mlx5_ib_modify_qp_resp { __u32 response_length; __u32 dctn; -- 2.14.3 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html