From: Parav Pandit <parav@xxxxxxxxxxxx> This patch adds support to query the congestion related hardware counters through new command and links them with other hw counters being available in hw_counters sysfs location. In order to reuse existing infrastructure it renames related q_counter data structures to more generic counters to reflect q_counters and congestion counters and maybe some other counters in the future. New hardware counters: * rp_cnp_handled - CNP packets handled by the reaction point * rp_cnp_ignored - CNP packets ignored by the reaction point * np_cnp_sent - CNP packets sent by notification point to respond to CE marked RoCE packets * np_ecn_marked_roce_packets - CE marked RoCE packets received by notification point It also avoids returning ENOSYS which is specific for invalid system call and produces the following checkpatch.pl warning. WARNING: ENOSYS means 'invalid syscall nr' and nothing else + return -ENOSYS; Signed-off-by: Parav Pandit <parav@xxxxxxxxxxxx> Reviewed-by: Eli Cohen <eli@xxxxxxxxxxxx> Reviewed-by: Daniel Jurgens <danielj@xxxxxxxxxxxx> Signed-off-by: Leon Romanovsky <leon@xxxxxxxxxx> --- Changes from v0: * fix overflow issue in access to congestion counters --- drivers/infiniband/hw/mlx5/cmd.c | 11 +++ drivers/infiniband/hw/mlx5/cmd.h | 2 + drivers/infiniband/hw/mlx5/main.c | 172 ++++++++++++++++++++++++----------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 7 +- drivers/infiniband/hw/mlx5/qp.c | 7 +- include/linux/mlx5/mlx5_ifc.h | 18 ++-- 6 files changed, 150 insertions(+), 67 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index cdc2d3017da7..18d5e1db93ed 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -46,3 +46,14 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey) null_mkey); return err; } + +int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, + bool reset, void *out, int out_size) +{ + u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { }; + + MLX5_SET(query_cong_statistics_in, in, opcode, + MLX5_CMD_OP_QUERY_CONG_STATISTICS); + MLX5_SET(query_cong_statistics_in, in, clear, reset); + return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 7ca8a7b6434d..fa09228193a6 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -37,4 +37,6 @@ #include <linux/mlx5/driver.h> int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); +int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, + bool reset, void *out, int out_size); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 4dc0a8785fe0..9e8ad11e354e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -57,6 +57,7 @@ #include <linux/mlx5/fs.h> #include <linux/mlx5/vport.h> #include "mlx5_ib.h" +#include "cmd.h" #define DRIVER_NAME "mlx5_ib" #define DRIVER_VERSION "2.2-1" @@ -3133,7 +3134,7 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev) mlx5_nic_vport_disable_roce(dev->mdev); } -struct mlx5_ib_q_counter { +struct mlx5_ib_counter { const char *name; size_t offset; }; @@ -3141,18 +3142,18 @@ struct mlx5_ib_q_counter { #define INIT_Q_COUNTER(_name) \ { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)} -static const struct mlx5_ib_q_counter basic_q_cnts[] = { +static const struct mlx5_ib_counter basic_q_cnts[] = { INIT_Q_COUNTER(rx_write_requests), INIT_Q_COUNTER(rx_read_requests), INIT_Q_COUNTER(rx_atomic_requests), INIT_Q_COUNTER(out_of_buffer), }; -static const struct mlx5_ib_q_counter out_of_seq_q_cnts[] = { +static const struct mlx5_ib_counter out_of_seq_q_cnts[] = { INIT_Q_COUNTER(out_of_sequence), }; -static const struct mlx5_ib_q_counter retrans_q_cnts[] = { +static const struct mlx5_ib_counter retrans_q_cnts[] = { INIT_Q_COUNTER(duplicate_request), INIT_Q_COUNTER(rnr_nak_retry_err), INIT_Q_COUNTER(packet_seq_err), @@ -3160,22 +3161,31 @@ static const struct mlx5_ib_q_counter retrans_q_cnts[] = { INIT_Q_COUNTER(local_ack_timeout_err), }; -static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev) +#define INIT_CONG_COUNTER(_name) \ + { .name = #_name, .offset = \ + MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)} + +static const struct mlx5_ib_counter cong_cnts[] = { + INIT_CONG_COUNTER(rp_cnp_ignored), + INIT_CONG_COUNTER(rp_cnp_handled), + INIT_CONG_COUNTER(np_ecn_marked_roce_packets), + INIT_CONG_COUNTER(np_cnp_sent), +}; + +static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) { unsigned int i; for (i = 0; i < dev->num_ports; i++) { mlx5_core_dealloc_q_counter(dev->mdev, - dev->port[i].q_cnts.set_id); - kfree(dev->port[i].q_cnts.names); - kfree(dev->port[i].q_cnts.offsets); + dev->port[i].cnts.set_id); + kfree(dev->port[i].cnts.names); + kfree(dev->port[i].cnts.offsets); } } -static int __mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev, - const char ***names, - size_t **offsets, - u32 *num) +static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, + struct mlx5_ib_counters *cnts) { u32 num_counters; @@ -3186,27 +3196,32 @@ static int __mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev, if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) num_counters += ARRAY_SIZE(retrans_q_cnts); + cnts->num_q_counters = num_counters; - *names = kcalloc(num_counters, sizeof(**names), GFP_KERNEL); - if (!*names) + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { + cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); + num_counters += ARRAY_SIZE(cong_cnts); + } + + cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); + if (!cnts->names) return -ENOMEM; - *offsets = kcalloc(num_counters, sizeof(**offsets), GFP_KERNEL); - if (!*offsets) + cnts->offsets = kcalloc(num_counters, + sizeof(cnts->offsets), GFP_KERNEL); + if (!cnts->offsets) goto err_names; - *num = num_counters; - return 0; err_names: - kfree(*names); + kfree(cnts->names); return -ENOMEM; } -static void mlx5_ib_fill_q_counters(struct mlx5_ib_dev *dev, - const char **names, - size_t *offsets) +static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, + const char **names, + size_t *offsets) { int i; int j = 0; @@ -3229,9 +3244,16 @@ static void mlx5_ib_fill_q_counters(struct mlx5_ib_dev *dev, offsets[j] = retrans_q_cnts[i].offset; } } + + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { + for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { + names[j] = cong_cnts[i].name; + offsets[j] = cong_cnts[i].offset; + } + } } -static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) +static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) { int i; int ret; @@ -3240,7 +3262,7 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) struct mlx5_ib_port *port = &dev->port[i]; ret = mlx5_core_alloc_q_counter(dev->mdev, - &port->q_cnts.set_id); + &port->cnts.set_id); if (ret) { mlx5_ib_warn(dev, "couldn't allocate queue counter for port %d, err %d\n", @@ -3248,15 +3270,12 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) goto dealloc_counters; } - ret = __mlx5_ib_alloc_q_counters(dev, - &port->q_cnts.names, - &port->q_cnts.offsets, - &port->q_cnts.num_counters); + ret = __mlx5_ib_alloc_counters(dev, &port->cnts); if (ret) goto dealloc_counters; - mlx5_ib_fill_q_counters(dev, port->q_cnts.names, - port->q_cnts.offsets); + mlx5_ib_fill_counters(dev, port->cnts.names, + port->cnts.offsets); } return 0; @@ -3264,7 +3283,7 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) dealloc_counters: while (--i >= 0) mlx5_core_dealloc_q_counter(dev->mdev, - dev->port[i].q_cnts.set_id); + dev->port[i].cnts.set_id); return ret; } @@ -3279,44 +3298,93 @@ static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, if (port_num == 0) return NULL; - return rdma_alloc_hw_stats_struct(port->q_cnts.names, - port->q_cnts.num_counters, + return rdma_alloc_hw_stats_struct(port->cnts.names, + port->cnts.num_q_counters + + port->cnts.num_cong_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); } -static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, - struct rdma_hw_stats *stats, - u8 port_num, int index) +static int mlx5_ib_query_q_counters(struct mlx5_ib_dev *dev, + struct mlx5_ib_port *port, + struct rdma_hw_stats *stats) { - struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_ib_port *port = &dev->port[port_num - 1]; int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); void *out; __be32 val; - int ret; - int i; - - if (!stats) - return -ENOSYS; + int ret, i; out = mlx5_vzalloc(outlen); if (!out) return -ENOMEM; ret = mlx5_core_query_q_counter(dev->mdev, - port->q_cnts.set_id, 0, + port->cnts.set_id, 0, out, outlen); if (ret) goto free; - for (i = 0; i < port->q_cnts.num_counters; i++) { - val = *(__be32 *)(out + port->q_cnts.offsets[i]); + for (i = 0; i < port->cnts.num_q_counters; i++) { + val = *(__be32 *)(out + port->cnts.offsets[i]); stats->value[i] = (u64)be32_to_cpu(val); } free: kvfree(out); - return port->q_cnts.num_counters; + return ret; +} + +static int mlx5_ib_query_cong_counters(struct mlx5_ib_dev *dev, + struct mlx5_ib_port *port, + struct rdma_hw_stats *stats) +{ + int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); + void *out; + int ret, i; + int offset = port->cnts.num_q_counters; + + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + ret = mlx5_cmd_query_cong_counter(dev->mdev, false, out, outlen); + if (ret) + goto free; + + for (i = 0; i < port->cnts.num_cong_counters; i++) { + stats->value[i + offset] = + be64_to_cpup((__be64 *)(out + + port->cnts.offsets[i + offset])); + } + +free: + kvfree(out); + return ret; +} + +static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u8 port_num, int index) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_port *port = &dev->port[port_num - 1]; + int ret, num_counters; + + if (!stats) + return -EINVAL; + + ret = mlx5_ib_query_q_counters(dev, port, stats); + if (ret) + return ret; + num_counters = port->cnts.num_q_counters; + + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { + ret = mlx5_ib_query_cong_counters(dev, port, stats); + if (ret) + return ret; + num_counters += port->cnts.num_cong_counters; + } + + return num_counters; } static void *mlx5_ib_add(struct mlx5_core_dev *mdev) @@ -3523,14 +3591,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) goto err_rsrc; if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { - err = mlx5_ib_alloc_q_counters(dev); + err = mlx5_ib_alloc_counters(dev); if (err) goto err_odp; } dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev); if (!dev->mdev->priv.uar) - goto err_q_cnt; + goto err_cnt; err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false); if (err) @@ -3574,9 +3642,9 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) err_uar_page: mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar); -err_q_cnt: +err_cnt: if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) - mlx5_ib_dealloc_q_counters(dev); + mlx5_ib_dealloc_counters(dev); err_odp: mlx5_ib_odp_remove_one(dev); @@ -3610,7 +3678,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) mlx5_free_bfreg(dev->mdev, &dev->bfreg); mlx5_put_uars_page(dev->mdev, mdev->priv.uar); if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) - mlx5_ib_dealloc_q_counters(dev); + mlx5_ib_dealloc_counters(dev); destroy_umrc_res(dev); mlx5_ib_odp_remove_one(dev); destroy_dev_resources(&dev->devr); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 3cd064b5f0bf..7f1221cb6a50 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -595,15 +595,16 @@ struct mlx5_ib_resources { struct mutex mutex; }; -struct mlx5_ib_q_counters { +struct mlx5_ib_counters { const char **names; size_t *offsets; - u32 num_counters; + u32 num_q_counters; + u32 num_cong_counters; u16 set_id; }; struct mlx5_ib_port { - struct mlx5_ib_q_counters q_cnts; + struct mlx5_ib_counters cnts; }; struct mlx5_roce { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index ad8a2638e339..74a4696b4833 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2798,7 +2798,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, qp->port) - 1; mibport = &dev->port[port_num]; context->qp_counter_set_usr_page |= - cpu_to_be32((u32)(mibport->q_cnts.set_id) << 24); + cpu_to_be32((u32)(mibport->cnts.set_id) << 24); } if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) @@ -2826,7 +2826,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, raw_qp_param.operation = op; if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { - raw_qp_param.rq_q_ctr_id = mibport->q_cnts.set_id; + raw_qp_param.rq_q_ctr_id = mibport->cnts.set_id; raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID; } @@ -4964,7 +4964,8 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) { MLX5_SET64(modify_rq_in, in, modify_bitmask, MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID); - MLX5_SET(rqc, rqc, counter_set_id, dev->port->q_cnts.set_id); + MLX5_SET(rqc, rqc, counter_set_id, + dev->port->cnts.set_id); } else pr_info_once("%s: Receive WQ counters are not supported on current FW\n", dev->ib_dev.name); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 838242697541..fa22a8f2ad27 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4623,17 +4623,17 @@ struct mlx5_ifc_query_cong_statistics_out_bits { u8 reserved_at_40[0x40]; - u8 cur_flows[0x20]; + u8 rp_cur_flows[0x20]; u8 sum_flows[0x20]; - u8 cnp_ignored_high[0x20]; + u8 rp_cnp_ignored_high[0x20]; - u8 cnp_ignored_low[0x20]; + u8 rp_cnp_ignored_low[0x20]; - u8 cnp_handled_high[0x20]; + u8 rp_cnp_handled_high[0x20]; - u8 cnp_handled_low[0x20]; + u8 rp_cnp_handled_low[0x20]; u8 reserved_at_140[0x100]; @@ -4643,13 +4643,13 @@ struct mlx5_ifc_query_cong_statistics_out_bits { u8 accumulators_period[0x20]; - u8 ecn_marked_roce_packets_high[0x20]; + u8 np_ecn_marked_roce_packets_high[0x20]; - u8 ecn_marked_roce_packets_low[0x20]; + u8 np_ecn_marked_roce_packets_low[0x20]; - u8 cnps_sent_high[0x20]; + u8 np_cnps_sent_high[0x20]; - u8 cnps_sent_low[0x20]; + u8 np_cnps_sent_low[0x20]; u8 reserved_at_320[0x560]; }; -- 2.12.2 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html