From: Chiara Meiohas <cmeiohas@xxxxxxxxxx> Introduce a new command for RDMA event monitoring. This patch adds a new attribute "event_type" which describes the event recieved. Add a new NETLINK_RDMA multicast group and processes listening to this multicast group receive RDMA events. The event types supported are IB device registration/unregistration and net device attachment/detachment. Example output of rdma monitor and the commands which trigger the events: $ rdma monitor $ rmmod mlx5_ib [UNREGISTER] dev 3 [UNREGISTER] dev 0 $modprobe mlx5_ib [REGISTER] dev 4 [NETDEV_ATTACH] dev 4 port 1 netdev 4 [REGISTER] dev 5 [NETDEV_ATTACH] dev 5 port 1 netdev 5 $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev [UNREGISTER] dev 4 [REGISTER] dev 6 [NETDEV_ATTACH] dev 6 port 6 netdev 4 $ echo 4 > /sys/class/net/eth2/device/sriov_numvfs [NETDEV_ATTACH] dev 6 port 2 netdev 7 [NETDEV_ATTACH] dev 6 port 3 netdev 8 [NETDEV_ATTACH] dev 6 port 4 netdev 9 [NETDEV_ATTACH] dev 6 port 5 netdev 10 [REGISTER] dev 7 [NETDEV_ATTACH] dev 7 port 1 netdev 11 [REGISTER] dev 8 [NETDEV_ATTACH] dev 8 port 1 netdev 12 [REGISTER] dev 9 [NETDEV_ATTACH] dev 9 port 1 netdev 13 [REGISTER] dev 10 [NETDEV_ATTACH] dev 10 port 1 netdev 14 $ echo 0 > /sys/class/net/eth2/device/sriov_numvfs [UNREGISTER] dev 7 [UNREGISTER] dev 8 [UNREGISTER] dev 9 [UNREGISTER] dev 10 [NETDEV_DETACH] dev 6 port 2 [NETDEV_DETACH] dev 6 port 3 [NETDEV_DETACH] dev 6 port 4 [NETDEV_DETACH] dev 6 port 5 Signed-off-by: Chiara Meiohas <cmeiohas@xxxxxxxxxx> Signed-off-by: Michael Guralnik <michaelgur@xxxxxxxxxx> --- include/mnl_utils.h | 1 + lib/mnl_utils.c | 5 ++ man/man8/rdma-monitor.8 | 51 ++++++++++++ man/man8/rdma.8 | 7 +- rdma/Makefile | 3 +- rdma/monitor.c | 167 ++++++++++++++++++++++++++++++++++++++++ rdma/rdma.c | 3 +- rdma/rdma.h | 1 + rdma/utils.c | 1 + 9 files changed, 236 insertions(+), 3 deletions(-) create mode 100644 man/man8/rdma-monitor.8 create mode 100644 rdma/monitor.c diff --git a/include/mnl_utils.h b/include/mnl_utils.h index 76fe1dfe..0ddf2932 100644 --- a/include/mnl_utils.h +++ b/include/mnl_utils.h @@ -24,6 +24,7 @@ int mnlu_gen_socket_sndrcv(struct mnlu_gen_socket *nlg, const struct nlmsghdr *n mnl_cb_t data_cb, void *data); struct mnl_socket *mnlu_socket_open(int bus); +int mnl_add_nl_group(struct mnl_socket *nl, unsigned int group); struct nlmsghdr *mnlu_msg_prepare(void *buf, uint32_t nlmsg_type, uint16_t flags, void *extra_header, size_t extra_header_size); int mnlu_socket_recv_run(struct mnl_socket *nl, unsigned int seq, void *buf, size_t buf_size, diff --git a/lib/mnl_utils.c b/lib/mnl_utils.c index 6c8f527e..5f6671bf 100644 --- a/lib/mnl_utils.c +++ b/lib/mnl_utils.c @@ -35,6 +35,11 @@ err_bind: return NULL; } +int mnl_add_nl_group(struct mnl_socket *nl, unsigned int group) +{ + return mnl_socket_bind(nl, group, MNL_SOCKET_AUTOPID); +} + struct nlmsghdr *mnlu_msg_prepare(void *buf, uint32_t nlmsg_type, uint16_t flags, void *extra_header, size_t extra_header_size) { diff --git a/man/man8/rdma-monitor.8 b/man/man8/rdma-monitor.8 new file mode 100644 index 00000000..d445cba0 --- /dev/null +++ b/man/man8/rdma-monitor.8 @@ -0,0 +1,51 @@ +.TH RDMA\-MONITOR 8 "22 Jul 2024" "iproute2" "Linux" +.SH NAME +rdma-monitor \- RDMA events monitoring +.SH SYNOPSIS +.sp +.ad l +.in +8 +.ti -8 +.B rdma +.RI "[ " OPTIONS " ]" +.B monitor +.RI " { " help " }" +.sp + +.ti -8 +.IR OPTIONS " := { " +\fB\-V\fR[\fIersion\fR] } + +.ti -8 +.B rdma monitor + +.ti -8 +.B rdma monitor help + +.SH "DESCRIPTION" +.SS rdma monitor - utility can monitor RDMA device events on all RDMA devices. +.PP +.B rdma +opens an RDMA Netlink socket, listens on it and dumps the event info. + +The event types supported are RDMA device registration/unregistration +and net device attachment/detachment. + +.SH "EXAMPLES" +.PP +rdma monitor +.RS 4 +Listen for events of all RDMA devices +.RE +.PP + +.SH SEE ALSO +.BR rdma (8), +.BR rdma-link (8), +.BR rdma-resource (8), +.BR rdma-system (8), +.BR rdma-statistic (8), +.br + +.SH AUTHOR +Chiara Meiohas <cmeiohas@xxxxxxxxxx> diff --git a/man/man8/rdma.8 b/man/man8/rdma.8 index 5088b9ec..df86284d 100644 --- a/man/man8/rdma.8 +++ b/man/man8/rdma.8 @@ -19,7 +19,7 @@ rdma \- RDMA tool .ti -8 .IR OBJECT " := { " -.BR dev " | " link " | " resource " | " system " | " statistic " }" +.BR dev " | " link " | " resource " | " system " | " statistic " | " monitor " }" .sp .ti -8 @@ -94,6 +94,10 @@ character. .B statistic - RDMA counter statistic related. +.TP +.B monitor +- RDMA events monitor + .PP The names of all objects may be written in full or abbreviated form, for example @@ -133,6 +137,7 @@ Exit status is 0 if command was successful or a positive integer upon failure. .BR rdma-resource (8), .BR rdma-system (8), .BR rdma-statistic (8), +.BR rdma-monitor (8), .br .SH REPORTING BUGS diff --git a/rdma/Makefile b/rdma/Makefile index 37d904a7..ed3c1c1c 100644 --- a/rdma/Makefile +++ b/rdma/Makefile @@ -4,7 +4,8 @@ include ../config.mk CFLAGS += -I./include/uapi/ RDMA_OBJ = rdma.o utils.o dev.o link.o res.o res-pd.o res-mr.o res-cq.o \ - res-cmid.o res-qp.o sys.o stat.o stat-mr.o res-ctx.o res-srq.o + res-cmid.o res-qp.o sys.o stat.o stat-mr.o res-ctx.o res-srq.o \ + monitor.o TARGETS += rdma diff --git a/rdma/monitor.c b/rdma/monitor.c new file mode 100644 index 00000000..d74727a0 --- /dev/null +++ b/rdma/monitor.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * monitor.c RDMA tool + * Authors: Chiara Meiohas <cmeiohas@xxxxxxxxxx> + */ + +#include "rdma.h" + +/* Global utils flags */ +extern int json; + +static const char *event_type_to_str(uint8_t etype) +{ + static const char *const event_types_str[] = { "[REGISTER]", + "[UNREGISTER]", + "[NETDEV_ATTACH]", + "[NETDEV_DETACH]" }; + + if (etype < ARRAY_SIZE(event_types_str)) + return event_types_str[etype]; + + return "[UNKNOWN]"; +} + +static int mon_show_rdma_register(struct nlattr **tb) +{ + enum rdma_nl_event_type etype; + uint32_t rdma_idx; + + if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX]) + return MNL_CB_ERROR; + + rdma_idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + etype = mnl_attr_get_u8(tb[RDMA_NLDEV_ATTR_EVENT_TYPE]); + + open_json_object(NULL); + print_string(PRINT_ANY, "event_type", "%s\t", + event_type_to_str(etype)); + print_uint(PRINT_ANY, "rdma_index", "dev %u", rdma_idx); + close_json_object(); + newline(); + fflush(stdout); + + return MNL_CB_OK; +} + +static int mon_show_netdev_association(struct nlattr **tb) +{ + uint32_t rdma_idx, port, net_idx; + enum rdma_nl_event_type etype; + + if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { + return MNL_CB_ERROR; + } + + rdma_idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + port = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); + etype = mnl_attr_get_u8(tb[RDMA_NLDEV_ATTR_EVENT_TYPE]); + + open_json_object(NULL); + print_string(PRINT_ANY, "event_type", "%s\t", event_type_to_str(etype)); + print_uint(PRINT_ANY, "rdma_index", "dev %u", rdma_idx); + print_uint(PRINT_ANY, "port", " port %u", port); + + if (etype == RDMA_NETDEV_ATTACH_EVENT) { + net_idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_NDEV_INDEX]); + print_uint(PRINT_ANY, "netdev_index", " netdev %u", net_idx); + } + close_json_object(); + newline(); + fflush(stdout); + + return MNL_CB_OK; +} + +static int mon_show_cb(const struct nlmsghdr *nlh, void *data) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX + 1] = {}; + enum rdma_nl_event_type etype; + + mnl_attr_parse(nlh, 0, rd_attr_cb, tb); + if (!tb[RDMA_NLDEV_ATTR_EVENT_TYPE]) + return MNL_CB_ERROR; + + etype = mnl_attr_get_u8(tb[RDMA_NLDEV_ATTR_EVENT_TYPE]); + + switch (etype) { + case RDMA_REGISTER_EVENT: + case RDMA_UNREGISTER_EVENT: + return mon_show_rdma_register(tb); + case RDMA_NETDEV_ATTACH_EVENT: + case RDMA_NETDEV_DETACH_EVENT: + return mon_show_netdev_association(tb); + default: + return MNL_CB_ERROR; + } + return MNL_CB_OK; +} + +static int mon_show(struct rd* rd) +{ + unsigned int groups = 0; + int one = 1; + char *buf; + int err; + + buf = malloc(MNL_SOCKET_BUFFER_SIZE); + if (!buf) { + printf("Buffer allocation failed\n"); + return -ENOMEM; + } + + rd->nl = mnl_socket_open(NETLINK_RDMA); + if (!rd->nl) { + pr_err("Failed to open NETLINK_RDMA socket. Error: %s\n", + strerror(errno)); + err = -ENODEV; + goto err_free; + } + mnl_socket_setsockopt(rd->nl, NETLINK_CAP_ACK, &one, sizeof(one)); + mnl_socket_setsockopt(rd->nl, NETLINK_EXT_ACK, &one, sizeof(one)); + + groups |= nl_mgrp(RDMA_NL_GROUP_NOTIFY); + + err = mnl_add_nl_group(rd->nl, groups); + if (err < 0) { + pr_err("Failed to add NETLINK_RDMA multicast group. Error: %s\n", + strerror(errno)); + goto err_close; + } + new_json_obj(json); + + err = mnlu_socket_recv_run(rd->nl, 0, buf, MNL_SOCKET_BUFFER_SIZE, + mon_show_cb, rd); + if (err) { + pr_err("Failed to listen to rdma socket\n"); + goto err_free_json; + } + + return 0; + +err_free_json: + delete_json_obj(); +err_close: + mnl_socket_close(rd->nl); +err_free: + free(buf); + return err; +} + +static int mon_help(struct rd *rd) +{ + pr_out("Usage: rdma monitor [ -j ]\n"); + return 0; +} + +int cmd_mon(struct rd *rd) +{ + const struct rd_cmd cmds[] = { + { NULL, mon_show }, + { "help", mon_help }, + { 0 } + }; + + return rd_exec_cmd(rd, cmds, "mon command"); +} + diff --git a/rdma/rdma.c b/rdma/rdma.c index 131c6b2a..253ac58b 100644 --- a/rdma/rdma.c +++ b/rdma/rdma.c @@ -15,7 +15,7 @@ static void help(char *name) { pr_out("Usage: %s [ OPTIONS ] OBJECT { COMMAND | help }\n" " %s [ -f[orce] ] -b[atch] filename\n" - "where OBJECT := { dev | link | resource | system | statistic | help }\n" + "where OBJECT := { dev | link | resource | monitor | system | statistic | help }\n" " OPTIONS := { -V[ersion] | -d[etails] | -j[son] | -p[retty] | -r[aw]}\n", name, name); } @@ -35,6 +35,7 @@ static int rd_cmd(struct rd *rd, int argc, char **argv) { "resource", cmd_res }, { "system", cmd_sys }, { "statistic", cmd_stat }, + { "monitor", cmd_mon }, { 0 } }; diff --git a/rdma/rdma.h b/rdma/rdma.h index d224ec57..fb037bcf 100644 --- a/rdma/rdma.h +++ b/rdma/rdma.h @@ -98,6 +98,7 @@ int cmd_link(struct rd *rd); int cmd_res(struct rd *rd); int cmd_sys(struct rd *rd); int cmd_stat(struct rd *rd); +int cmd_mon(struct rd* rd); int rd_exec_cmd(struct rd *rd, const struct rd_cmd *c, const char *str); int rd_exec_dev(struct rd *rd, int (*cb)(struct rd *rd)); int rd_exec_require_dev(struct rd *rd, int (*cb)(struct rd *rd)); diff --git a/rdma/utils.c b/rdma/utils.c index 4d3803b5..bc104e0f 100644 --- a/rdma/utils.c +++ b/rdma/utils.c @@ -477,6 +477,7 @@ static const enum mnl_attr_data_type nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE] = MNL_TYPE_U8, [RDMA_NLDEV_ATTR_DEV_TYPE] = MNL_TYPE_U8, [RDMA_NLDEV_ATTR_PARENT_NAME] = MNL_TYPE_STRING, + [RDMA_NLDEV_ATTR_EVENT_TYPE] = MNL_TYPE_U8, }; static int rd_attr_check(const struct nlattr *attr, int *typep) -- 2.17.2