From: Aharon Landau <aharonl@xxxxxxxxxx> This patch adds the ability to add/remove optional counter to a link through RDMA netlink. Limit it to users with ADMIN capability only. Examples: $ sudo rdma statistic add link rocep8s0f0/1 optional-set cc_rx_ce_pkts $ sudo rdma statistic remove link rocep8s0f0/1 optional-set cc_rx_ce_pkts Signed-off-by: Aharon Landau <aharonl@xxxxxxxxxx> Signed-off-by: Neta Ostrovsky <netao@xxxxxxxxxx> Signed-off-by: Mark Zhang <markzhang@xxxxxxxxxx> --- drivers/infiniband/core/counters.c | 50 ++++++++++++++++ drivers/infiniband/core/device.c | 2 + drivers/infiniband/core/nldev.c | 93 ++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 7 +++ include/rdma/rdma_counter.h | 4 ++ include/rdma/rdma_netlink.h | 1 + include/uapi/rdma/rdma_netlink.h | 9 +++ 7 files changed, 166 insertions(+) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index b8b6db98bfdf..fa04178aa0eb 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -106,6 +106,56 @@ static int __rdma_counter_bind_qp(struct rdma_counter *counter, return ret; } +static struct rdma_op_counter *get_opcounter(struct rdma_op_stats *opstats, + const char *name) +{ + int i; + + for (i = 0; i < opstats->num_opcounters; i++) + if (!strcmp(opstats->opcounters[i].name, name)) + return opstats->opcounters + i; + + return NULL; +} + +static int rdma_opcounter_set(struct ib_device *dev, u32 port, + const char *name, bool is_add) +{ + struct rdma_port_counter *port_counter; + struct rdma_op_counter *opc; + int ret; + + if (!dev->ops.add_op_stat || !dev->ops.remove_op_stat) + return -EOPNOTSUPP; + + port_counter = &dev->port_data[port].port_counter; + opc = get_opcounter(port_counter->opstats, name); + if (!opc) + return -EINVAL; + + mutex_lock(&port_counter->opstats->lock); + ret = is_add ? dev->ops.add_op_stat(dev, port, opc->type) : + dev->ops.remove_op_stat(dev, port, opc->type); + if (ret) + goto end; + + opc->enabled = is_add; +end: + mutex_unlock(&port_counter->opstats->lock); + return ret; +} + +int rdma_opcounter_add(struct ib_device *dev, u32 port, const char *name) +{ + return rdma_opcounter_set(dev, port, name, true); +} + +int rdma_opcounter_remove(struct ib_device *dev, u32 port, + const char *name) +{ + return rdma_opcounter_set(dev, port, name, false); +} + static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port, struct ib_qp *qp, enum rdma_nl_counter_mode mode) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 23e1ae50b2e4..b9138f20f9a8 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2590,6 +2590,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) ops->uverbs_no_driver_id_binding; SET_DEVICE_OP(dev_ops, add_gid); + SET_DEVICE_OP(dev_ops, add_op_stat); SET_DEVICE_OP(dev_ops, advise_mr); SET_DEVICE_OP(dev_ops, alloc_dm); SET_DEVICE_OP(dev_ops, alloc_hw_device_stats); @@ -2701,6 +2702,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, reg_dm_mr); SET_DEVICE_OP(dev_ops, reg_user_mr); SET_DEVICE_OP(dev_ops, reg_user_mr_dmabuf); + SET_DEVICE_OP(dev_ops, remove_op_stat); SET_DEVICE_OP(dev_ops, req_notify_cq); SET_DEVICE_OP(dev_ops, rereg_user_mr); SET_DEVICE_OP(dev_ops, resize_cq); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index e9b4b2cccaa0..17d55d89f11c 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -154,6 +154,11 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 }, [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 }, [RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_STAT_OPCOUNTERS] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_OPCOUNTER_NAME_SIZE }, + [RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 }, }; static int put_driver_name_print_type(struct sk_buff *msg, const char *name, @@ -1888,6 +1893,86 @@ static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } +static int nldev_stat_set_op_stat(struct sk_buff *skb, + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + bool cmd_add) +{ + char opcounter[RDMA_NLDEV_ATTR_OPCOUNTER_NAME_SIZE] = {}; + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; + struct ib_device *device; + struct sk_buff *msg; + u32 index, port; + int ret; + + ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); + + if (ret || !tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME] || + !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || + !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) + return -EINVAL; + + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + device = ib_device_get_by_index(sock_net(skb->sk), index); + if (!device) + return -EINVAL; + + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); + if (!rdma_is_port_valid(device, port)) { + ret = -EINVAL; + goto err; + } + + nla_strscpy(opcounter, tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME], + sizeof(opcounter)); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto err; + } + + nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + (cmd_add ? + RDMA_NLDEV_CMD_STAT_ADD_OPCOUNTER : + RDMA_NLDEV_CMD_STAT_REMOVE_OPCOUNTER)), + 0, 0); + + if (cmd_add) + ret = rdma_opcounter_add(device, port, opcounter); + else + ret = rdma_opcounter_remove(device, port, opcounter); + if (ret) + goto err_msg; + + nlmsg_end(msg, nlh); + ib_device_put(device); + return rdma_nl_unicast(sock_net(skb->sk), msg, + NETLINK_CB(skb).portid); + +err_msg: + nlmsg_free(msg); +err: + ib_device_put(device); + return ret; +} + +static int nldev_stat_add_op_stat_doit(struct sk_buff *skb, + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + return nldev_stat_set_op_stat(skb, nlh, extack, true); +} + +static int nldev_stat_remove_op_stat_doit(struct sk_buff *skb, + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + return nldev_stat_set_op_stat(skb, nlh, extack, false); +} + static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -2342,6 +2427,14 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { .dump = nldev_res_get_mr_raw_dumpit, .flags = RDMA_NL_ADMIN_PERM, }, + [RDMA_NLDEV_CMD_STAT_ADD_OPCOUNTER] = { + .doit = nldev_stat_add_op_stat_doit, + .flags = RDMA_NL_ADMIN_PERM, + }, + [RDMA_NLDEV_CMD_STAT_REMOVE_OPCOUNTER] = { + .doit = nldev_stat_remove_op_stat_doit, + .flags = RDMA_NL_ADMIN_PERM, + }, }; void __init nldev_init(void) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 40b0f7825975..fa9e668b9b14 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -600,11 +600,14 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct( /** * struct rdma_op_counter + * @enabled - To indicate if this counter is currently enabled (as optional + * counters can be dynamically enabled/disabled) * @type - The vendor-specific type of the counter * @name - The name of the counter * @value - The value of the counter */ struct rdma_op_counter { + bool enabled; int type; const char *name; u64 value; @@ -2595,6 +2598,10 @@ struct ib_device_ops { struct rdma_op_stats *(*alloc_op_port_stats)(struct ib_device *device, u32 port_num); + int (*add_op_stat)(struct ib_device *device, u32 port, + int optional_stat); + int (*remove_op_stat)(struct ib_device *device, u32 port, + int optional_stat); /** * Allows rdma drivers to add their own restrack attributes. */ diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h index 3531c5061718..48086a7248ac 100644 --- a/include/rdma/rdma_counter.h +++ b/include/rdma/rdma_counter.h @@ -63,5 +63,9 @@ int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port, int rdma_counter_get_mode(struct ib_device *dev, u32 port, enum rdma_nl_counter_mode *mode, enum rdma_nl_counter_mask *mask); +int rdma_opcounter_add(struct ib_device *dev, u32 port, + const char *name); +int rdma_opcounter_remove(struct ib_device *dev, u32 port, + const char *name); #endif /* _RDMA_COUNTER_H_ */ diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 2758d9df71ee..ac47a0cc0508 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -10,6 +10,7 @@ enum { RDMA_NLDEV_ATTR_EMPTY_STRING = 1, RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16, RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE = 32, + RDMA_NLDEV_ATTR_OPCOUNTER_NAME_SIZE = 64, }; struct rdma_nl_cbs { diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 75a1ae2311d8..79e6ca87d2e0 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -297,6 +297,10 @@ enum rdma_nldev_command { RDMA_NLDEV_CMD_RES_SRQ_GET, /* can dump */ + RDMA_NLDEV_CMD_STAT_ADD_OPCOUNTER, + + RDMA_NLDEV_CMD_STAT_REMOVE_OPCOUNTER, + RDMA_NLDEV_NUM_OPS }; @@ -549,6 +553,11 @@ enum rdma_nldev_attr { RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, /* u8 */ + RDMA_NLDEV_ATTR_STAT_OPCOUNTERS, /* nested table */ + RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY, /* nested table */ + RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME, /* string */ + RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_VALUE, /* u64 */ + /* * Always the end */ -- 2.26.2