From: Neta Ostrovsky <netao@xxxxxxxxxx> This patch adds the ability to return per-port optional counter statistisc through RDMA netlink. Examples: $ rdma statistic show link rocep8s0f0/1 link rocep8s0f0/1 rx_write_requests 0 rx_read_requests 0 rx_atomic_requests 0 out_of_buffer 0 out_of_sequence 0 duplicate_request 0 rnr_nak_retry_err 0 packet_seq_err 0 implied_nak_seq_err 0 local_ack_timeout_err 0 resp_local_length_error 0 resp_cqe_error 0 req_cqe_error 0 req_remote_invalid_request 0 req_remote_access_errors 0 resp_remote_access_errors 0 resp_cqe_flush_error 0 req_cqe_flush_error 0 roce_adp_retrans 0 roce_adp_retrans_to 0 roce_slow_restart 0 roce_slow_restart_cnps 0 roce_slow_restart_trans 0 rp_cnp_ignored 0 rp_cnp_handled 0 np_ecn_marked_roce_packets 0 np_cnp_sent 0 rx_icrc_encapsulated 0 Optional-set: cc_rx_ce_pkts 0 Signed-off-by: Aharon Landau <aharonl@xxxxxxxxxx> Signed-off-by: Neta Ostrovsky <netao@xxxxxxxxxx> Signed-off-by: Mark Zhang <markzhang@xxxxxxxxxx> --- drivers/infiniband/core/counters.c | 18 +++++ drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/nldev.c | 117 +++++++++++++++++++++++------ include/rdma/ib_verbs.h | 3 + include/rdma/rdma_counter.h | 3 + 5 files changed, 119 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index fa04178aa0eb..5f7a12b8f1bb 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -459,6 +459,24 @@ u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u32 port, u32 index) return sum; } +/* + * rdma_opcounter_query_stats - Query the per-port optional counter values + */ +int rdma_opcounter_query_stats(struct rdma_op_stats *opstats, + struct ib_device *dev, u32 port) +{ + int ret = 0; + + if (!dev->ops.get_op_stats) + return -EOPNOTSUPP; + + mutex_lock(&opstats->lock); + ret = dev->ops.get_op_stats(dev, port, opstats); + mutex_unlock(&opstats->lock); + + return ret; +} + static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) { struct rdma_restrack_entry *res = NULL; diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index b9138f20f9a8..efd4b75b7752 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2657,6 +2657,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, get_link_layer); SET_DEVICE_OP(dev_ops, get_netdev); SET_DEVICE_OP(dev_ops, get_numa_node); + SET_DEVICE_OP(dev_ops, get_op_stats); SET_DEVICE_OP(dev_ops, get_port_immutable); SET_DEVICE_OP(dev_ops, get_vector_affinity); SET_DEVICE_OP(dev_ops, get_vf_config); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 17d55d89f11c..b665651dfb1d 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -945,6 +945,30 @@ int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name, } EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry); +static int rdma_nl_stat_opcounter_entry(struct sk_buff *msg, const char *name, + u64 value) +{ + struct nlattr *entry_attr; + + entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY); + if (!entry_attr) + return -EMSGSIZE; + + if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME, + name)) + goto err; + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_VALUE, + value, RDMA_NLDEV_ATTR_PAD)) + goto err; + + nla_nest_end(msg, entry_attr); + return 0; + +err: + nla_nest_cancel(msg, entry_attr); + return -EMSGSIZE; +} + static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { @@ -2124,15 +2148,52 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return ret; } +static int stat_get_optional_counter(struct sk_buff *msg, + struct ib_device *device, u32 port) +{ + struct rdma_op_stats *opstats; + struct nlattr *opstats_table; + int i, ret = 0; + + opstats = device->port_data[port].port_counter.opstats; + if (!opstats) + return 0; + + ret = rdma_opcounter_query_stats(opstats, device, port); + if (ret) + return ret; + + opstats_table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_OPCOUNTERS); + if (!opstats_table) + return -EMSGSIZE; + + for (i = 0; i < opstats->num_opcounters; i++) { + if (!(opstats->opcounters[i].enabled)) + continue; + ret = rdma_nl_stat_opcounter_entry(msg, + opstats->opcounters[i].name, + opstats->opcounters[i].value); + if (ret) + goto err; + } + nla_nest_end(msg, opstats_table); + + return 0; + +err: + nla_nest_cancel(msg, opstats_table); + return ret; +} + static int stat_get_doit_default_counter(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack, struct nlattr *tb[]) { - struct rdma_hw_stats *stats; - struct nlattr *table_attr; + struct rdma_hw_stats *hwstats; + struct nlattr *hwstats_table; struct ib_device *device; - int ret, num_cnts, i; + int ret, num_hwstats, i; struct sk_buff *msg; u32 index, port; u64 v; @@ -2145,14 +2206,19 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, if (!device) return -EINVAL; + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); + if (!rdma_is_port_valid(device, port)) { + ret = -EINVAL; + goto err; + } + if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) { ret = -EINVAL; goto err; } - port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); - stats = ib_get_hw_stats_port(device, port); - if (!stats) { + hwstats = ib_get_hw_stats_port(device, port); + if (!hwstats) { ret = -EINVAL; goto err; } @@ -2174,38 +2240,43 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, goto err_msg; } - mutex_lock(&stats->lock); + mutex_lock(&hwstats->lock); - num_cnts = device->ops.get_hw_stats(device, stats, port, 0); - if (num_cnts < 0) { + num_hwstats = device->ops.get_hw_stats(device, hwstats, port, 0); + if (num_hwstats < 0) { ret = -EINVAL; - goto err_stats; + goto err_hwstats; } - table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); - if (!table_attr) { + hwstats_table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); + if (!hwstats_table) { ret = -EMSGSIZE; - goto err_stats; + goto err_hwstats; } - for (i = 0; i < num_cnts; i++) { - v = stats->value[i] + + for (i = 0; i < num_hwstats; i++) { + v = hwstats->value[i] + rdma_counter_get_hwstat_value(device, port, i); - if (rdma_nl_stat_hwcounter_entry(msg, stats->names[i], v)) { + if (rdma_nl_stat_hwcounter_entry(msg, hwstats->names[i], v)) { ret = -EMSGSIZE; - goto err_table; + goto err_hwstats_table; } } - nla_nest_end(msg, table_attr); + nla_nest_end(msg, hwstats_table); + + mutex_unlock(&hwstats->lock); + + ret = stat_get_optional_counter(msg, device, port); + if (ret) + goto err_msg; - mutex_unlock(&stats->lock); nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); -err_table: - nla_nest_cancel(msg, table_attr); -err_stats: - mutex_unlock(&stats->lock); +err_hwstats_table: + nla_nest_cancel(msg, hwstats_table); +err_hwstats: + mutex_unlock(&hwstats->lock); err_msg: nlmsg_free(msg); err: diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index fa9e668b9b14..d85f2e842a1d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2602,6 +2602,9 @@ struct ib_device_ops { int optional_stat); int (*remove_op_stat)(struct ib_device *device, u32 port, int optional_stat); + int (*get_op_stats)(struct ib_device *device, u32 port, + struct rdma_op_stats *stats); + /** * Allows rdma drivers to add their own restrack attributes. */ diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h index 48086a7248ac..31686a234c77 100644 --- a/include/rdma/rdma_counter.h +++ b/include/rdma/rdma_counter.h @@ -67,5 +67,8 @@ int rdma_opcounter_add(struct ib_device *dev, u32 port, const char *name); int rdma_opcounter_remove(struct ib_device *dev, u32 port, const char *name); +int rdma_opcounter_query_stats(struct rdma_op_stats *opstats, + struct ib_device *dev, u32 port); + #endif /* _RDMA_COUNTER_H_ */ -- 2.26.2