>-----Original Message----- >From: linux-rdma-owner@xxxxxxxxxxxxxxx [mailto:linux-rdma- >owner@xxxxxxxxxxxxxxx] On Behalf Of Steve Wise >Sent: Monday, November 12, 2018 11:26 AM >To: dledford@xxxxxxxxxx; jgg@xxxxxxxxxxxx >Cc: linux-rdma@xxxxxxxxxxxxxxx; BMT@xxxxxxxxxxxxxx >Subject: [PATCH v2 1/2] RDMA/Core: add >RDMA_NLDEV_CMD_NEWLINK/DELLINK support > >Add support for new LINK messages to allow adding and deleting rdma >interfaces. This will be used initially for soft rdma drivers which >instantiate device instances dynamically by the admin specifying a >netdev device to use. The rdma_rxe module will be the first user of >these messages. > >The design is modeled after RTNL_NEWLINK/DELLINK: rdma drivers >register with the rdma core if they provide link add/delete functions. >Each driver registers with a unique "type" string, that is used to >dispatch messages coming from user space. A new RDMA_NLDEV_ATTR is >defined for the "type" string. User mode will pass 3 attributes in a >NEWLINK message: RDMA_NLDEV_ATTR_IBDEV_NAME for the desired rdma >device >name to be created, RDMA_NLDEV_ATTR_LINK_TYPE for the "type" of link >being added, and RDMA_NLDEV_ATTR_NDEV_NAME for the net_device >interface >to use for this link. The DELLINK message will contain the IBDEV_NAME >and LINK_TYPE attributes. > >Signed-off-by: Steve Wise <swise@xxxxxxxxxxxxxxxxxxxxx> >Reviewed-by: Leon Romanovsky <leonro@xxxxxxxxxxxx> >--- > drivers/infiniband/core/nldev.c | 136 >+++++++++++++++++++++++++++++++++++++++ > include/rdma/rdma_netlink.h | 12 ++++ > include/uapi/rdma/rdma_netlink.h | 9 ++- > 3 files changed, 156 insertions(+), 1 deletion(-) > >diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c >index 573399e3ccc1..926638cd52bc 100644 >--- a/drivers/infiniband/core/nldev.c >+++ b/drivers/infiniband/core/nldev.c >@@ -33,6 +33,7 @@ > #include <linux/module.h> > #include <linux/pid.h> > #include <linux/pid_namespace.h> >+#include <linux/mutex.h> > #include <net/netlink.h> > #include <rdma/rdma_cm.h> > #include <rdma/rdma_netlink.h> >@@ -107,6 +108,8 @@ > [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 }, > [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 }, > [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 }, >+ [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = >NLA_NUL_STRING, >+ .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN >}, > }; > > static int put_driver_name_print_type(struct sk_buff *msg, const char >*name, >@@ -1103,6 +1106,131 @@ static int nldev_res_get_pd_dumpit(struct >sk_buff *skb, > return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD); > } > >+static LIST_HEAD(link_ops); >+static DEFINE_MUTEX(link_ops_mutex); >+ >+static const struct rdma_link_ops *link_ops_get(const char *type) >+{ _get() usually seems like a reference counted mechanism. Would this be better named as _ops_find()? >+ const struct rdma_link_ops *ops; >+ >+ mutex_lock(&link_ops_mutex); >+ list_for_each_entry(ops, &link_ops, list) { >+ if (!strcmp(ops->type, type)) >+ goto out; >+ } >+ ops = NULL; >+out: >+ mutex_unlock(&link_ops_mutex); >+ return ops; >+} >+ >+void rdma_link_register(struct rdma_link_ops *ops) >+{ >+ if (link_ops_get(ops->type)) { >+ WARN_ONCE("Duplicate rdma_link_ops! %s\n", ops->type); >+ return; >+ } >+ mutex_lock(&link_ops_mutex); >+ list_add(&ops->list, &link_ops); >+ mutex_unlock(&link_ops_mutex); >+} >+EXPORT_SYMBOL(rdma_link_register); >+ >+void rdma_link_unregister(struct rdma_link_ops *ops) >+{ >+ mutex_lock(&link_ops_mutex); >+ list_del(&ops->list); >+ mutex_unlock(&link_ops_mutex); >+} >+EXPORT_SYMBOL(rdma_link_unregister); Will there be any issues with a link being unregistered while in use? >+ >+static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, >+ struct netlink_ext_ack *extack) >+{ >+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; >+ char ibdev_name[IB_DEVICE_NAME_MAX]; >+ const struct rdma_link_ops *ops; >+ char ndev_name[IFNAMSIZ]; >+ char type[IFNAMSIZ]; >+ int err; >+ >+ err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, >+ nldev_policy, extack); >+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] || >+ !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || >!tb[RDMA_NLDEV_ATTR_NDEV_NAME]) { >+ err = -EINVAL; You are losing the information of nlmsg_parse() failing, is that ok? >+ goto err_out; Is err_out really necessary? Maybe just return? >+ } >+ if (nla_len(tb[RDMA_NLDEV_ATTR_DEV_NAME]) > >sizeof(ibdev_name) || >+ nla_len(tb[RDMA_NLDEV_ATTR_LINK_TYPE]) > sizeof(type) || >+ nla_len(tb[RDMA_NLDEV_ATTR_NDEV_NAME]) > >sizeof(ndev_name)) { >+ err = -EINVAL; >+ goto err_out; >+ } >+ nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME], >+ sizeof(ibdev_name)); >+ if (strchr(ibdev_name, '%')) { >+ err = -EINVAL; >+ goto err_out; >+ } >+ nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type)); >+ nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME], >+ sizeof(ndev_name)); >+ >+ ops = link_ops_get(type); >+#ifdef CONFIG_MODULES >+ if (!ops) { >+ request_module("rdma-link-%s", type); >+ ops = link_ops_get(type); >+ } >+#endif >+ if (!ops) { >+ err = -ENODEV; >+ goto err_out; >+ } >+ >+ err = ops->newlink(ibdev_name, ndev_name); >+err_out: >+ return err; >+} >+ >+static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, >+ struct netlink_ext_ack *extack) >+{ >+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; >+ char ibdev_name[IB_DEVICE_NAME_MAX]; >+ const struct rdma_link_ops *ops; >+ char type[IFNAMSIZ]; >+ int err; >+ >+ err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, >+ nldev_policy, extack); >+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] || >+ !tb[RDMA_NLDEV_ATTR_LINK_TYPE]) { >+ err = -EINVAL; >+ goto err_out; >+ } >+ >+ if (nla_len(tb[RDMA_NLDEV_ATTR_DEV_NAME]) > >sizeof(ibdev_name) || >+ nla_len(tb[RDMA_NLDEV_ATTR_LINK_TYPE]) > sizeof(type)) { >+ err = -EINVAL; >+ goto err_out; >+ } >+ nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME], >+ sizeof(ibdev_name)); >+ nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type)); >+ >+ ops = link_ops_get(type); >+ if (!ops) { >+ err = -ENODEV; >+ goto err_out; >+ } >+ >+ err = ops->dellink(ibdev_name); >+err_out: >+ return err; >+} >+ > static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = >{ > [RDMA_NLDEV_CMD_GET] = { > .doit = nldev_get_doit, >@@ -1112,6 +1240,14 @@ static int nldev_res_get_pd_dumpit(struct sk_buff >*skb, > .doit = nldev_set_doit, > .flags = RDMA_NL_ADMIN_PERM, > }, >+ [RDMA_NLDEV_CMD_NEWLINK] = { >+ .doit = nldev_newlink, >+ .flags = RDMA_NL_ADMIN_PERM, >+ }, >+ [RDMA_NLDEV_CMD_DELLINK] = { >+ .doit = nldev_dellink, >+ .flags = RDMA_NL_ADMIN_PERM, >+ }, > [RDMA_NLDEV_CMD_PORT_GET] = { > .doit = nldev_port_get_doit, > .dump = nldev_port_get_dumpit, >diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h >index 70218e6b5187..42daca1589bd 100644 >--- a/include/rdma/rdma_netlink.h >+++ b/include/rdma/rdma_netlink.h >@@ -99,4 +99,16 @@ int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr >*nlh, > * Returns true on success or false if no listeners. > */ > bool rdma_nl_chk_listeners(unsigned int group); >+ >+struct rdma_link_ops { >+ struct list_head list; >+ const char *type; >+ int (*newlink)(const char *ibdev_name, const char *ndev_name); >+ int (*dellink)(const char *ibdev_name); >+}; >+ >+void rdma_link_register(struct rdma_link_ops *ops); >+void rdma_link_unregister(struct rdma_link_ops *ops); >+ >+#define MODULE_ALIAS_RDMA_LINK(type) MODULE_ALIAS("rdma-link-" >type) > #endif /* _RDMA_NETLINK_H */ >diff --git a/include/uapi/rdma/rdma_netlink.h >b/include/uapi/rdma/rdma_netlink.h >index f9c41bf59efc..7feb087ab6fd 100644 >--- a/include/uapi/rdma/rdma_netlink.h >+++ b/include/uapi/rdma/rdma_netlink.h >@@ -229,7 +229,9 @@ enum rdma_nldev_command { > RDMA_NLDEV_CMD_GET, /* can dump */ > RDMA_NLDEV_CMD_SET, > >- /* 3 - 4 are free to use */ >+ RDMA_NLDEV_CMD_NEWLINK, >+ >+ RDMA_NLDEV_CMD_DELLINK, > > RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */ Minor nit... Should the "= 5 " be cleaned up? This may be off topic, but is this going the general mechanism for getting stuff to the specific driver? 1) I.e. create a new API interface with a call back 2) Add the plumbing to the underlying driver Is there a plan for adding other things? Do you happen to have any thoughts on things like driver specific config information? Thanks, Mike >@@ -428,6 +430,11 @@ enum rdma_nldev_attr { > RDMA_NLDEV_ATTR_DRIVER_U64, /* u64 */ > > /* >+ * Identifies the rdma driver. eg: "rxe" or "siw" >+ */ >+ RDMA_NLDEV_ATTR_LINK_TYPE, /* string */ >+ >+ /* > * Always the end > */ > RDMA_NLDEV_ATTR_MAX >-- >1.8.3.1