Implement RDMA nldev netlink interface to get detailed CM_ID information. Because cm_id's are attached to rdma devices in various work queue contexts, the pid and task information at device-attach time is sometimes not useful. For example, an nvme/f host connection cm_id ends up being bound to a device in a work queue context and the resulting pid at attach time no longer exists after connection setup. So instead we mark all cm_id's created via the rdma_ucm as "user", and all others as "kernel". This required tweaking the restrack code a little. It also required wrapping some rdma_cm functions to allow passing the module name string. Signed-off-by: Steve Wise <swise@xxxxxxxxxxxxxxxxxxxxx> --- drivers/infiniband/core/cma.c | 59 ++++++++++++++++--------- drivers/infiniband/core/cma_priv.h | 6 +++ drivers/infiniband/core/nldev.c | 88 +++++++++++++++++++++++++++++++++++++- drivers/infiniband/core/restrack.c | 10 ++++- drivers/infiniband/core/ucma.c | 8 ++-- include/rdma/rdma_cm.h | 18 +++++--- include/rdma/restrack.h | 4 ++ include/uapi/rdma/rdma_netlink.h | 14 ++++++ 8 files changed, 176 insertions(+), 31 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 203519e..e76d0c1 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -466,6 +466,9 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv, id_priv->id.route.addr.dev_addr.transport = rdma_node_get_transport(cma_dev->device->node_type); list_add_tail(&id_priv->list, &cma_dev->id_list); + id_priv->res.type = RDMA_RESTRACK_CM_ID; + id_priv->res.kern_name = id_priv->caller; + rdma_restrack_add(&id_priv->res); } static void cma_attach_to_dev(struct rdma_id_private *id_priv, @@ -738,10 +741,10 @@ static void cma_deref_id(struct rdma_id_private *id_priv) complete(&id_priv->comp); } -struct rdma_cm_id *rdma_create_id(struct net *net, - rdma_cm_event_handler event_handler, - void *context, enum rdma_port_space ps, - enum ib_qp_type qp_type) +struct rdma_cm_id *__rdma_create_id(struct net *net, + rdma_cm_event_handler event_handler, + void *context, enum rdma_port_space ps, + enum ib_qp_type qp_type, const char *caller) { struct rdma_id_private *id_priv; @@ -749,7 +752,10 @@ struct rdma_cm_id *rdma_create_id(struct net *net, if (!id_priv) return ERR_PTR(-ENOMEM); - id_priv->owner = task_pid_nr(current); + if (caller) + id_priv->caller = caller; + else + id_priv->owner = task_pid_nr(current); id_priv->state = RDMA_CM_IDLE; id_priv->id.context = context; id_priv->id.event_handler = event_handler; @@ -769,7 +775,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net, return &id_priv->id; } -EXPORT_SYMBOL(rdma_create_id); +EXPORT_SYMBOL(__rdma_create_id); static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) { @@ -1629,6 +1635,7 @@ void rdma_destroy_id(struct rdma_cm_id *id) mutex_unlock(&id_priv->handler_mutex); if (id_priv->cma_dev) { + rdma_restrack_del(&id_priv->res); if (rdma_cap_ib_cm(id_priv->id.device, 1)) { if (id_priv->cm_id.ib) ib_destroy_cm_id(id_priv->cm_id.ib); @@ -1778,6 +1785,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, struct ib_cm_event *ib_event, struct net_device *net_dev) { + struct rdma_id_private *listen_id_priv; struct rdma_id_private *id_priv; struct rdma_cm_id *id; struct rdma_route *rt; @@ -1787,9 +1795,11 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, ib_event->param.req_rcvd.primary_path->service_id; int ret; - id = rdma_create_id(listen_id->route.addr.dev_addr.net, + listen_id_priv = container_of(listen_id, struct rdma_id_private, id); + id = __rdma_create_id(listen_id->route.addr.dev_addr.net, listen_id->event_handler, listen_id->context, - listen_id->ps, ib_event->param.req_rcvd.qp_type); + listen_id->ps, ib_event->param.req_rcvd.qp_type, + listen_id_priv->caller); if (IS_ERR(id)) return NULL; @@ -1838,14 +1848,16 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, struct ib_cm_event *ib_event, struct net_device *net_dev) { + struct rdma_id_private *listen_id_priv; struct rdma_id_private *id_priv; struct rdma_cm_id *id; const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; struct net *net = listen_id->route.addr.dev_addr.net; int ret; - id = rdma_create_id(net, listen_id->event_handler, listen_id->context, - listen_id->ps, IB_QPT_UD); + listen_id_priv = container_of(listen_id, struct rdma_id_private, id); + id = __rdma_create_id(net, listen_id->event_handler, listen_id->context, + listen_id->ps, IB_QPT_UD, listen_id_priv->caller); if (IS_ERR(id)) return NULL; @@ -2111,10 +2123,11 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, goto out; /* Create a new RDMA id for the new IW CM ID */ - new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, - listen_id->id.event_handler, - listen_id->id.context, - RDMA_PS_TCP, IB_QPT_RC); + new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net, + listen_id->id.event_handler, + listen_id->id.context, + RDMA_PS_TCP, IB_QPT_RC, + listen_id->caller); if (IS_ERR(new_cm_id)) { ret = -ENOMEM; goto out; @@ -2239,8 +2252,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return; - id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, - id_priv->id.qp_type); + id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, + id_priv->id.qp_type, id_priv->caller); if (IS_ERR(id)) return; @@ -3348,8 +3361,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) return 0; err2: - if (id_priv->cma_dev) + if (id_priv->cma_dev) { + rdma_restrack_del(&id_priv->res); cma_release_dev(id_priv); + } err1: cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); return ret; @@ -3732,14 +3747,18 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv, return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); } -int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) +int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, + const char *caller) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); - id_priv->owner = task_pid_nr(current); + if (caller) + id_priv->caller = caller; + else + id_priv->owner = task_pid_nr(current); if (!cma_comp(id_priv, RDMA_CM_CONNECT)) return -EINVAL; @@ -3779,7 +3798,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) rdma_reject(id, NULL, 0); return ret; } -EXPORT_SYMBOL(rdma_accept); +EXPORT_SYMBOL(__rdma_accept); int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) { diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h index 11a41be..a3f208c 100644 --- a/drivers/infiniband/core/cma_priv.h +++ b/drivers/infiniband/core/cma_priv.h @@ -75,5 +75,11 @@ struct rdma_id_private { u8 reuseaddr; u8 afonly; enum ib_gid_type gid_type; + const char *caller; + + /* + * Internal to RDMA/core, don't use in the drivers + */ + struct rdma_restrack_entry res; }; #endif /* _CMA_PRIV_H */ diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 4f1cfa6..46ce553 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -34,9 +34,11 @@ #include <linux/pid.h> #include <linux/pid_namespace.h> #include <net/netlink.h> +#include <rdma/rdma_cm.h> #include <rdma/rdma_netlink.h> #include "core_priv.h" +#include "cma_priv.h" static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 }, @@ -71,6 +73,13 @@ [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING, .len = TASK_COMM_LEN }, + [RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = { + .len = sizeof(struct __kernel_sockaddr_storage) }, + [RDMA_NLDEV_ATTR_RES_DST_ADDR] = { + .len = sizeof(struct __kernel_sockaddr_storage) }, }; static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device) @@ -182,6 +191,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device) [RDMA_RESTRACK_PD] = "pd", [RDMA_RESTRACK_CQ] = "cq", [RDMA_RESTRACK_QP] = "qp", + [RDMA_RESTRACK_CM_ID] = "cm_id", }; struct rdma_restrack_root *res = &device->res; @@ -284,6 +294,66 @@ static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb, return -EMSGSIZE; } +static int fill_res_cm_id_entry(struct sk_buff *msg, + struct netlink_callback *cb, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct rdma_id_private *id_priv = + container_of(res, struct rdma_id_private, res); + struct rdma_cm_id *cm_id = &id_priv->id; + struct nlattr *entry_attr; + + if (port && port != cm_id->port_num) + return 0; + + entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY); + if (!entry_attr) + goto out; + + if (cm_id->port_num && + nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num)) + goto err; + + if (id_priv->qp_num && + nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num)) + goto err; + + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps)) + goto err; + + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type)) + goto err; + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state)) + goto err; + + if (nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR, + sizeof(cm_id->route.addr.src_addr), + &cm_id->route.addr.src_addr)) + goto err; + if (nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR, + sizeof(cm_id->route.addr.dst_addr), + &cm_id->route.addr.dst_addr)) + goto err; + + if (id_priv->caller) { + if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, + id_priv->caller)) + goto err; + } else { + /* CMA keeps the owning pid. */ + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, id_priv->owner)) + goto err; + } + + nla_nest_end(msg, entry_attr); + return 0; + +err: + nla_nest_cancel(msg, entry_attr); +out: + return -EMSGSIZE; +} + static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -583,7 +653,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, NULL); /* - * Right now, we are expecting the device index to get QP information, + * Right now, we are expecting the device index to get res information, * but it is possible to extend this code to return all devices in * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX. * if it doesn't exist, we will iterate over all devices. @@ -707,6 +777,12 @@ static int res_get_common_dumpit(struct sk_buff *skb, .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_QP, }, + [RDMA_RESTRACK_CM_ID] = { + .fill_res_func = fill_res_cm_id_entry, + .res_type = RDMA_RESTRACK_CM_ID, + .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET, + .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID, + }, }; static int nldev_res_get_qp_dumpit(struct sk_buff *skb, @@ -715,6 +791,13 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb, return res_get_common_dumpit(skb, cb, &fill_entries[RDMA_RESTRACK_QP]); } +static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return res_get_common_dumpit(skb, cb, + &fill_entries[RDMA_RESTRACK_CM_ID]); +} + static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_GET] = { .doit = nldev_get_doit, @@ -741,6 +824,9 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb, * too. */ }, + [RDMA_NLDEV_CMD_RES_CM_ID_GET] = { + .dump = nldev_res_get_cm_id_dumpit, + }, }; void __init nldev_init(void) diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 41a7800..a85d8f5 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -3,12 +3,15 @@ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved. */ +#include <rdma/rdma_cm.h> #include <rdma/ib_verbs.h> #include <rdma/restrack.h> #include <linux/mutex.h> #include <linux/sched/task.h> #include <linux/pid_namespace.h> +#include "cma_priv.h" + void rdma_restrack_init(struct rdma_restrack_root *res) { init_rwsem(&res->rwsem); @@ -44,7 +47,7 @@ static void set_kern_name(struct rdma_restrack_entry *res) struct ib_qp *qp; if (type != RDMA_RESTRACK_QP) - /* PD and CQ types already have this name embedded in */ + /* Other types already have this name embedded in */ return; qp = container_of(res, struct ib_qp, res); @@ -67,6 +70,9 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res) return container_of(res, struct ib_cq, res)->device; case RDMA_RESTRACK_QP: return container_of(res, struct ib_qp, res)->device; + case RDMA_RESTRACK_CM_ID: + return container_of(res, struct rdma_id_private, + res)->id.device; default: WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type); return NULL; @@ -82,6 +88,8 @@ static bool res_is_user(struct rdma_restrack_entry *res) return container_of(res, struct ib_cq, res)->uobject; case RDMA_RESTRACK_QP: return container_of(res, struct ib_qp, res)->uobject; + case RDMA_RESTRACK_CM_ID: + return !container_of(res, struct rdma_id_private, res)->caller; default: WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type); return false; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index f015f1b..4764626 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -476,8 +476,8 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, return -ENOMEM; ctx->uid = cmd.uid; - ctx->cm_id = rdma_create_id(current->nsproxy->net_ns, - ucma_event_handler, ctx, cmd.ps, qp_type); + ctx->cm_id = __rdma_create_id(current->nsproxy->net_ns, + ucma_event_handler, ctx, cmd.ps, qp_type, NULL); if (IS_ERR(ctx->cm_id)) { ret = PTR_ERR(ctx->cm_id); goto err1; @@ -1084,12 +1084,12 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, if (cmd.conn_param.valid) { ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); mutex_lock(&file->mut); - ret = rdma_accept(ctx->cm_id, &conn_param); + ret = __rdma_accept(ctx->cm_id, &conn_param, NULL); if (!ret) ctx->uid = cmd.uid; mutex_unlock(&file->mut); } else - ret = rdma_accept(ctx->cm_id, NULL); + ret = __rdma_accept(ctx->cm_id, NULL, NULL); ucma_put_ctx(ctx); return ret; diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 6538a5c..62caae8 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -157,6 +157,11 @@ struct rdma_cm_id { u8 port_num; }; +struct rdma_cm_id *__rdma_create_id(struct net *net, + rdma_cm_event_handler event_handler, + void *context, enum rdma_port_space ps, + enum ib_qp_type qp_type, const char *caller); + /** * rdma_create_id - Create an RDMA identifier. * @@ -169,10 +174,9 @@ struct rdma_cm_id { * * The id holds a reference on the network namespace until it is destroyed. */ -struct rdma_cm_id *rdma_create_id(struct net *net, - rdma_cm_event_handler event_handler, - void *context, enum rdma_port_space ps, - enum ib_qp_type qp_type); +#define rdma_create_id(net, event_handler, context, ps, qp_type) \ + __rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \ + KBUILD_MODNAME) /** * rdma_destroy_id - Destroys an RDMA identifier. @@ -284,6 +288,9 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, */ int rdma_listen(struct rdma_cm_id *id, int backlog); +int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, + const char *caller); + /** * rdma_accept - Called to accept a connection request or response. * @id: Connection identifier associated with the request. @@ -299,7 +306,8 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, * state of the qp associated with the id is modified to error, such that any * previously posted receive buffers would be flushed. */ -int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param); +#define rdma_accept(id, conn_param) \ + __rdma_accept((id), (conn_param), KBUILD_MODNAME) /** * rdma_notify - Notifies the RDMA CM of an asynchronous event that has diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index 2cdf8dc..bbb1a8f 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -29,6 +29,10 @@ enum rdma_restrack_type { */ RDMA_RESTRACK_QP, /** + * @RDMA_RESTRACK_CM_ID: Connection Manager ID (CM_ID) + */ + RDMA_RESTRACK_CM_ID, + /** * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations */ RDMA_RESTRACK_MAX diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 4c77e2a..0399aed 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -238,6 +238,8 @@ enum rdma_nldev_command { RDMA_NLDEV_CMD_RES_QP_GET, /* can dump */ + RDMA_NLDEV_CMD_RES_CM_ID_GET, /* can dump */ + RDMA_NLDEV_NUM_OPS }; @@ -350,6 +352,18 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_ATTR_RES_KERN_NAME, /* string */ + RDMA_NLDEV_ATTR_RES_CM_ID, /* nested table */ + RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY, /* nested table */ + /* + * rdma_cm_id port space. + */ + RDMA_NLDEV_ATTR_RES_PS, /* u32 */ + /* + * Source and destination socket addresses + */ + RDMA_NLDEV_ATTR_RES_SRC_ADDR, /* __kernel_sockaddr_storage */ + RDMA_NLDEV_ATTR_RES_DST_ADDR, /* __kernel_sockaddr_storage */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html