Hi Doug, For some reason older version of this patch is being posted. My bad. Patchwork state is shows as new for this patch, so I guess it's not merged yet. If you have not merged this, please drop this patch. I am sending v1 version of it. Parav > -----Original Message----- > From: Leon Romanovsky [mailto:leon@xxxxxxxxxx] > Sent: Tuesday, March 28, 2017 10:01 PM > To: Doug Ledford <dledford@xxxxxxxxxx> > Cc: linux-rdma@xxxxxxxxxxxxxxx; Parav Pandit <parav@xxxxxxxxxxxx> > Subject: [PATCH rdma-next] IB/mlx5: Support congestion related counters > > From: Parav Pandit <parav@xxxxxxxxxxxx> > > This patch adds support to query the congestion related hardware counters > through new command and links them with other hw counters being > available in hw_counters sysfs location. > > In order to reuse existing infrastructure it renames related q_counter data > structures to more generic counters to reflect q_counters and congestion > counters and maybe some other counters in the future. > > New hardware counters: > * rp_cnps_handled - CNP packets handled by the reaction point > * rp_cpns_ignored - CNP packets ignored by the reaction point > * np_cnps_sent - CNP packets sent by notification point to respond to > CE marked RoCE packets > * np_ecn_marked_roce_packets - CE marked RoCE packets received by > notification point > > It also avoids returning ENOSYS which is specific for invalid system call and > produces the following checkpatch.pl warning. > > WARNING: ENOSYS means 'invalid syscall nr' and nothing else > + return -ENOSYS; > > Signed-off-by: Parav Pandit <parav@xxxxxxxxxxxx> > Reviewed-by: Eli Cohen <eli@xxxxxxxxxxxx> > Signed-off-by: Leon Romanovsky <leon@xxxxxxxxxx> > --- > drivers/infiniband/hw/mlx5/cmd.c | 11 +++ > drivers/infiniband/hw/mlx5/cmd.h | 2 + > drivers/infiniband/hw/mlx5/main.c | 171 ++++++++++++++++++++++++-- > --------- > drivers/infiniband/hw/mlx5/mlx5_ib.h | 7 +- > drivers/infiniband/hw/mlx5/qp.c | 7 +- > include/linux/mlx5/mlx5_ifc.h | 18 ++-- > 6 files changed, 149 insertions(+), 67 deletions(-) > > diff --git a/drivers/infiniband/hw/mlx5/cmd.c > b/drivers/infiniband/hw/mlx5/cmd.c > index cdc2d3017da7..18d5e1db93ed 100644 > --- a/drivers/infiniband/hw/mlx5/cmd.c > +++ b/drivers/infiniband/hw/mlx5/cmd.c > @@ -46,3 +46,14 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, > u32 *null_mkey) > null_mkey); > return err; > } > + > +int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, > + bool reset, void *out, int out_size) { > + u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { }; > + > + MLX5_SET(query_cong_statistics_in, in, opcode, > + MLX5_CMD_OP_QUERY_CONG_STATISTICS); > + MLX5_SET(query_cong_statistics_in, in, clear, reset); > + return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); } > diff --git a/drivers/infiniband/hw/mlx5/cmd.h > b/drivers/infiniband/hw/mlx5/cmd.h > index 7ca8a7b6434d..fa09228193a6 100644 > --- a/drivers/infiniband/hw/mlx5/cmd.h > +++ b/drivers/infiniband/hw/mlx5/cmd.h > @@ -37,4 +37,6 @@ > #include <linux/mlx5/driver.h> > > int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); > +int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, > + bool reset, void *out, int out_size); > #endif /* MLX5_IB_CMD_H */ > diff --git a/drivers/infiniband/hw/mlx5/main.c > b/drivers/infiniband/hw/mlx5/main.c > index 4dc0a8785fe0..def740e8a243 100644 > --- a/drivers/infiniband/hw/mlx5/main.c > +++ b/drivers/infiniband/hw/mlx5/main.c > @@ -57,6 +57,7 @@ > #include <linux/mlx5/fs.h> > #include <linux/mlx5/vport.h> > #include "mlx5_ib.h" > +#include "cmd.h" > > #define DRIVER_NAME "mlx5_ib" > #define DRIVER_VERSION "2.2-1" > @@ -3133,7 +3134,7 @@ static void mlx5_disable_eth(struct mlx5_ib_dev > *dev) > mlx5_nic_vport_disable_roce(dev->mdev); > } > > -struct mlx5_ib_q_counter { > +struct mlx5_ib_counter { > const char *name; > size_t offset; > }; > @@ -3141,18 +3142,18 @@ struct mlx5_ib_q_counter { > #define INIT_Q_COUNTER(_name) \ > { .name = #_name, .offset = > MLX5_BYTE_OFF(query_q_counter_out, _name)} > > -static const struct mlx5_ib_q_counter basic_q_cnts[] = { > +static const struct mlx5_ib_counter basic_q_cnts[] = { > INIT_Q_COUNTER(rx_write_requests), > INIT_Q_COUNTER(rx_read_requests), > INIT_Q_COUNTER(rx_atomic_requests), > INIT_Q_COUNTER(out_of_buffer), > }; > > -static const struct mlx5_ib_q_counter out_of_seq_q_cnts[] = { > +static const struct mlx5_ib_counter out_of_seq_q_cnts[] = { > INIT_Q_COUNTER(out_of_sequence), > }; > > -static const struct mlx5_ib_q_counter retrans_q_cnts[] = { > +static const struct mlx5_ib_counter retrans_q_cnts[] = { > INIT_Q_COUNTER(duplicate_request), > INIT_Q_COUNTER(rnr_nak_retry_err), > INIT_Q_COUNTER(packet_seq_err), > @@ -3160,22 +3161,31 @@ static const struct mlx5_ib_q_counter > retrans_q_cnts[] = { > INIT_Q_COUNTER(local_ack_timeout_err), > }; > > -static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev) > +#define INIT_CONG_COUNTER(_name) \ > + { .name = #_name, .offset = \ > + MLX5_BYTE_OFF(query_cong_statistics_out, _name ## > _high)} > + > +static const struct mlx5_ib_counter cong_cnts[] = { > + INIT_CONG_COUNTER(rp_cnp_ignored), > + INIT_CONG_COUNTER(rp_cnp_handled), > + INIT_CONG_COUNTER(np_ecn_marked_roce_packets), > + INIT_CONG_COUNTER(np_cnps_sent), > +}; > + > +static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) > { > unsigned int i; > > for (i = 0; i < dev->num_ports; i++) { > mlx5_core_dealloc_q_counter(dev->mdev, > - dev->port[i].q_cnts.set_id); > - kfree(dev->port[i].q_cnts.names); > - kfree(dev->port[i].q_cnts.offsets); > + dev->port[i].cnts.set_id); > + kfree(dev->port[i].cnts.names); > + kfree(dev->port[i].cnts.offsets); > } > } > > -static int __mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev, > - const char ***names, > - size_t **offsets, > - u32 *num) > +static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, > + struct mlx5_ib_counters *cnts) > { > u32 num_counters; > > @@ -3186,27 +3196,32 @@ static int __mlx5_ib_alloc_q_counters(struct > mlx5_ib_dev *dev, > > if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) > num_counters += ARRAY_SIZE(retrans_q_cnts); > + cnts->num_q_counters = num_counters; > > - *names = kcalloc(num_counters, sizeof(**names), GFP_KERNEL); > - if (!*names) > + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { > + cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); > + num_counters += ARRAY_SIZE(cong_cnts); > + } > + > + cnts->names = kcalloc(num_counters, sizeof(cnts->names), > GFP_KERNEL); > + if (!cnts->names) > return -ENOMEM; > > - *offsets = kcalloc(num_counters, sizeof(**offsets), GFP_KERNEL); > - if (!*offsets) > + cnts->offsets = kcalloc(num_counters, > + sizeof(cnts->offsets), GFP_KERNEL); > + if (!cnts->offsets) > goto err_names; > > - *num = num_counters; > - > return 0; > > err_names: > - kfree(*names); > + kfree(cnts->names); > return -ENOMEM; > } > > -static void mlx5_ib_fill_q_counters(struct mlx5_ib_dev *dev, > - const char **names, > - size_t *offsets) > +static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, > + const char **names, > + size_t *offsets) > { > int i; > int j = 0; > @@ -3229,9 +3244,16 @@ static void mlx5_ib_fill_q_counters(struct > mlx5_ib_dev *dev, > offsets[j] = retrans_q_cnts[i].offset; > } > } > + > + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { > + for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { > + names[j] = cong_cnts[i].name; > + offsets[j] = cong_cnts[i].offset; > + } > + } > } > > -static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) > +static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) > { > int i; > int ret; > @@ -3240,7 +3262,7 @@ static int mlx5_ib_alloc_q_counters(struct > mlx5_ib_dev *dev) > struct mlx5_ib_port *port = &dev->port[i]; > > ret = mlx5_core_alloc_q_counter(dev->mdev, > - &port->q_cnts.set_id); > + &port->cnts.set_id); > if (ret) { > mlx5_ib_warn(dev, > "couldn't allocate queue counter for port > %d, err %d\n", @@ -3248,15 +3270,12 @@ static int > mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) > goto dealloc_counters; > } > > - ret = __mlx5_ib_alloc_q_counters(dev, > - &port->q_cnts.names, > - &port->q_cnts.offsets, > - &port- > >q_cnts.num_counters); > + ret = __mlx5_ib_alloc_counters(dev, &port->cnts); > if (ret) > goto dealloc_counters; > > - mlx5_ib_fill_q_counters(dev, port->q_cnts.names, > - port->q_cnts.offsets); > + mlx5_ib_fill_counters(dev, port->cnts.names, > + port->cnts.offsets); > } > > return 0; > @@ -3264,7 +3283,7 @@ static int mlx5_ib_alloc_q_counters(struct > mlx5_ib_dev *dev) > dealloc_counters: > while (--i >= 0) > mlx5_core_dealloc_q_counter(dev->mdev, > - dev->port[i].q_cnts.set_id); > + dev->port[i].cnts.set_id); > > return ret; > } > @@ -3279,44 +3298,92 @@ static struct rdma_hw_stats > *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, > if (port_num == 0) > return NULL; > > - return rdma_alloc_hw_stats_struct(port->q_cnts.names, > - port->q_cnts.num_counters, > + return rdma_alloc_hw_stats_struct(port->cnts.names, > + port->cnts.num_q_counters + > + port->cnts.num_cong_counters, > > RDMA_HW_STATS_DEFAULT_LIFESPAN); } > > -static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, > - struct rdma_hw_stats *stats, > - u8 port_num, int index) > +static int mlx5_ib_query_q_counters(struct mlx5_ib_dev *dev, > + struct mlx5_ib_port *port, > + struct rdma_hw_stats *stats) > { > - struct mlx5_ib_dev *dev = to_mdev(ibdev); > - struct mlx5_ib_port *port = &dev->port[port_num - 1]; > int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); > void *out; > __be32 val; > - int ret; > - int i; > - > - if (!stats) > - return -ENOSYS; > + int ret, i; > > out = mlx5_vzalloc(outlen); > if (!out) > return -ENOMEM; > > ret = mlx5_core_query_q_counter(dev->mdev, > - port->q_cnts.set_id, 0, > + port->cnts.set_id, 0, > out, outlen); > if (ret) > goto free; > > - for (i = 0; i < port->q_cnts.num_counters; i++) { > - val = *(__be32 *)(out + port->q_cnts.offsets[i]); > + for (i = 0; i < port->cnts.num_q_counters; i++) { > + val = *(__be32 *)(out + port->cnts.offsets[i]); > stats->value[i] = (u64)be32_to_cpu(val); > } > > free: > kvfree(out); > - return port->q_cnts.num_counters; > + return ret; > +} > + > +static int mlx5_ib_query_cong_counters(struct mlx5_ib_dev *dev, > + struct mlx5_ib_port *port, > + struct rdma_hw_stats *stats) { > + int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); > + void *out; > + int ret, i, offset = port->cnts.num_q_counters - 1; > + > + out = mlx5_vzalloc(outlen); > + if (!out) > + return -ENOMEM; > + > + ret = mlx5_cmd_query_cong_counter(dev->mdev, false, out, > outlen); > + if (ret) > + goto free; > + > + for (i = 0; i < port->cnts.num_cong_counters; i++) { > + stats->value[i + offset] = > + be64_to_cpup((__be64 *)(out + > + port->cnts.offsets[i + offset])); > + } > + > +free: > + kvfree(out); > + return ret; > +} > + > +static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, > + struct rdma_hw_stats *stats, > + u8 port_num, int index) > +{ > + struct mlx5_ib_dev *dev = to_mdev(ibdev); > + struct mlx5_ib_port *port = &dev->port[port_num - 1]; > + int ret, num_counters; > + > + if (!stats) > + return -EINVAL; > + > + ret = mlx5_ib_query_q_counters(dev, port, stats); > + if (ret) > + return ret; > + num_counters = port->cnts.num_q_counters; > + > + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { > + ret = mlx5_ib_query_cong_counters(dev, port, stats); > + if (ret) > + return ret; > + num_counters += port->cnts.num_cong_counters; > + } > + > + return num_counters; > } > > static void *mlx5_ib_add(struct mlx5_core_dev *mdev) @@ -3523,14 > +3590,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) > goto err_rsrc; > > if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { > - err = mlx5_ib_alloc_q_counters(dev); > + err = mlx5_ib_alloc_counters(dev); > if (err) > goto err_odp; > } > > dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev); > if (!dev->mdev->priv.uar) > - goto err_q_cnt; > + goto err_cnt; > > err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false); > if (err) > @@ -3574,9 +3641,9 @@ static void *mlx5_ib_add(struct mlx5_core_dev > *mdev) > err_uar_page: > mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar); > > -err_q_cnt: > +err_cnt: > if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) > - mlx5_ib_dealloc_q_counters(dev); > + mlx5_ib_dealloc_counters(dev); > > err_odp: > mlx5_ib_odp_remove_one(dev); > @@ -3610,7 +3677,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev > *mdev, void *context) > mlx5_free_bfreg(dev->mdev, &dev->bfreg); > mlx5_put_uars_page(dev->mdev, mdev->priv.uar); > if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) > - mlx5_ib_dealloc_q_counters(dev); > + mlx5_ib_dealloc_counters(dev); > destroy_umrc_res(dev); > mlx5_ib_odp_remove_one(dev); > destroy_dev_resources(&dev->devr); > diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h > b/drivers/infiniband/hw/mlx5/mlx5_ib.h > index 3cd064b5f0bf..7f1221cb6a50 100644 > --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h > +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h > @@ -595,15 +595,16 @@ struct mlx5_ib_resources { > struct mutex mutex; > }; > > -struct mlx5_ib_q_counters { > +struct mlx5_ib_counters { > const char **names; > size_t *offsets; > - u32 num_counters; > + u32 num_q_counters; > + u32 num_cong_counters; > u16 set_id; > }; > > struct mlx5_ib_port { > - struct mlx5_ib_q_counters q_cnts; > + struct mlx5_ib_counters cnts; > }; > > struct mlx5_roce { > diff --git a/drivers/infiniband/hw/mlx5/qp.c > b/drivers/infiniband/hw/mlx5/qp.c index ad8a2638e339..74a4696b4833 > 100644 > --- a/drivers/infiniband/hw/mlx5/qp.c > +++ b/drivers/infiniband/hw/mlx5/qp.c > @@ -2798,7 +2798,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, > qp->port) - 1; > mibport = &dev->port[port_num]; > context->qp_counter_set_usr_page |= > - cpu_to_be32((u32)(mibport->q_cnts.set_id) << 24); > + cpu_to_be32((u32)(mibport->cnts.set_id) << 24); > } > > if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == > IB_QPS_INIT) @@ -2826,7 +2826,7 @@ static int > __mlx5_ib_modify_qp(struct ib_qp *ibqp, > > raw_qp_param.operation = op; > if (cur_state == IB_QPS_RESET && new_state == > IB_QPS_INIT) { > - raw_qp_param.rq_q_ctr_id = mibport- > >q_cnts.set_id; > + raw_qp_param.rq_q_ctr_id = mibport->cnts.set_id; > raw_qp_param.set_mask |= > MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID; > } > > @@ -4964,7 +4964,8 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct > ib_wq_attr *wq_attr, > if (MLX5_CAP_GEN(dev->mdev, > modify_rq_counter_set_id)) { > MLX5_SET64(modify_rq_in, in, modify_bitmask, > > MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID); > - MLX5_SET(rqc, rqc, counter_set_id, dev->port- > >q_cnts.set_id); > + MLX5_SET(rqc, rqc, counter_set_id, > + dev->port->cnts.set_id); > } else > pr_info_once("%s: Receive WQ counters are not > supported on current FW\n", > dev->ib_dev.name); > diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h > index 838242697541..fa22a8f2ad27 100644 > --- a/include/linux/mlx5/mlx5_ifc.h > +++ b/include/linux/mlx5/mlx5_ifc.h > @@ -4623,17 +4623,17 @@ struct mlx5_ifc_query_cong_statistics_out_bits { > > u8 reserved_at_40[0x40]; > > - u8 cur_flows[0x20]; > + u8 rp_cur_flows[0x20]; > > u8 sum_flows[0x20]; > > - u8 cnp_ignored_high[0x20]; > + u8 rp_cnp_ignored_high[0x20]; > > - u8 cnp_ignored_low[0x20]; > + u8 rp_cnp_ignored_low[0x20]; > > - u8 cnp_handled_high[0x20]; > + u8 rp_cnp_handled_high[0x20]; > > - u8 cnp_handled_low[0x20]; > + u8 rp_cnp_handled_low[0x20]; > > u8 reserved_at_140[0x100]; > > @@ -4643,13 +4643,13 @@ struct mlx5_ifc_query_cong_statistics_out_bits { > > u8 accumulators_period[0x20]; > > - u8 ecn_marked_roce_packets_high[0x20]; > + u8 np_ecn_marked_roce_packets_high[0x20]; > > - u8 ecn_marked_roce_packets_low[0x20]; > + u8 np_ecn_marked_roce_packets_low[0x20]; > > - u8 cnps_sent_high[0x20]; > + u8 np_cnps_sent_high[0x20]; > > - u8 cnps_sent_low[0x20]; > + u8 np_cnps_sent_low[0x20]; > > u8 reserved_at_320[0x560]; > }; > -- > 2.12.0 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html