>-----Original Message----- >From: linux-rdma-owner@xxxxxxxxxxxxxxx [mailto:linux-rdma- >owner@xxxxxxxxxxxxxxx] On Behalf Of Steve Wise >Sent: Tuesday, November 27, 2018 10:37 AM >To: dledford@xxxxxxxxxx; jgg@xxxxxxxxxxxx >Cc: linux-rdma@xxxxxxxxxxxxxxx; BMT@xxxxxxxxxxxxxx >Subject: [PATCH v3 1/2] RDMA/Core: add >RDMA_NLDEV_CMD_NEWLINK/DELLINK support > >Add support for new LINK messages to allow adding and deleting rdma >interfaces. This will be used initially for soft rdma drivers which >instantiate device instances dynamically by the admin specifying a >netdev device to use. The rdma_rxe module will be the first user of >these messages. > >The design is modeled after RTNL_NEWLINK/DELLINK: rdma drivers >register with the rdma core if they provide link add/delete functions. >Each driver registers with a unique "type" string, that is used to >dispatch messages coming from user space. A new RDMA_NLDEV_ATTR is >defined for the "type" string. User mode will pass 3 attributes in a >NEWLINK message: RDMA_NLDEV_ATTR_IBDEV_NAME for the desired rdma >device >name to be created, RDMA_NLDEV_ATTR_LINK_TYPE for the "type" of link >being added, and RDMA_NLDEV_ATTR_NDEV_NAME for the net_device interface >to use for this link. The DELLINK message will contain the IBDEV_NAME >and LINK_TYPE attributes. > >Signed-off-by: Steve Wise <swise@xxxxxxxxxxxxxxxxxxxxx> >Reviewed-by: Leon Romanovsky <leonro@xxxxxxxxxxxx> >--- > drivers/infiniband/core/nldev.c | 131 >+++++++++++++++++++++++++++++++++++++++ > include/rdma/rdma_netlink.h | 12 ++++ > include/uapi/rdma/rdma_netlink.h | 11 +++- > 3 files changed, 152 insertions(+), 2 deletions(-) > >diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c >index 63cc74483188..04d8af0ccb25 100644 >--- a/drivers/infiniband/core/nldev.c >+++ b/drivers/infiniband/core/nldev.c >@@ -33,6 +33,7 @@ > #include <linux/module.h> > #include <linux/pid.h> > #include <linux/pid_namespace.h> >+#include <linux/mutex.h> > #include <net/netlink.h> > #include <rdma/rdma_cm.h> > #include <rdma/rdma_netlink.h> >@@ -107,6 +108,8 @@ > [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 }, > [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 }, > [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 }, >+ [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = >NLA_NUL_STRING, >+ .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN }, > }; > > static int put_driver_name_print_type(struct sk_buff *msg, const char *name, >@@ -1103,6 +1106,126 @@ static int nldev_res_get_pd_dumpit(struct sk_buff >*skb, > return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD); > } > >+static LIST_HEAD(link_ops); >+static DEFINE_MUTEX(link_ops_mutex); >+ >+static const struct rdma_link_ops *link_ops_get(const char *type) >+{ >+ const struct rdma_link_ops *ops; >+ >+ list_for_each_entry(ops, &link_ops, list) { >+ if (!strcmp(ops->type, type)) >+ goto out; >+ } >+ ops = NULL; >+out: >+ return ops; >+} >+ >+void rdma_link_register(struct rdma_link_ops *ops) >+{ >+ mutex_lock(&link_ops_mutex); >+ if (link_ops_get(ops->type)) { >+ WARN_ONCE("Duplicate rdma_link_ops! %s\n", ops->type); >+ goto out; >+ } >+ list_add(&ops->list, &link_ops); >+out: >+ mutex_unlock(&link_ops_mutex); >+} >+EXPORT_SYMBOL(rdma_link_register); >+ >+void rdma_link_unregister(struct rdma_link_ops *ops) >+{ >+ mutex_lock(&link_ops_mutex); >+ list_del(&ops->list); >+ mutex_unlock(&link_ops_mutex); >+} >+EXPORT_SYMBOL(rdma_link_unregister); >+ >+static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, >+ struct netlink_ext_ack *extack) >+{ >+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; >+ char ibdev_name[IB_DEVICE_NAME_MAX]; >+ const struct rdma_link_ops *ops; >+ char ndev_name[IFNAMSIZ]; >+ char type[IFNAMSIZ]; >+ int err; >+ >+ err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, >+ nldev_policy, extack); >+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] || >+ !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || >!tb[RDMA_NLDEV_ATTR_NDEV_NAME]) >+ return -EINVAL; >+ >+ if (nla_len(tb[RDMA_NLDEV_ATTR_DEV_NAME]) > sizeof(ibdev_name) || >+ nla_len(tb[RDMA_NLDEV_ATTR_LINK_TYPE]) > sizeof(type) || >+ nla_len(tb[RDMA_NLDEV_ATTR_NDEV_NAME]) > sizeof(ndev_name)) >+ return -EINVAL; >+ >+ nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME], >+ sizeof(ibdev_name)); >+ if (strchr(ibdev_name, '%')) >+ return -EINVAL; >+ >+ nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type)); >+ nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME], >+ sizeof(ndev_name)); >+ >+ mutex_lock(&link_ops_mutex); >+ ops = link_ops_get(type); >+#ifdef CONFIG_MODULES >+ if (!ops) { >+ mutex_unlock(&link_ops_mutex); >+ request_module("rdma-link-%s", type); >+ mutex_lock(&link_ops_mutex); >+ ops = link_ops_get(type); >+ } >+#endif >+ if (ops) >+ err = ops->newlink(ibdev_name, ndev_name); >+ else >+ err = -ENODEV; >+ mutex_unlock(&link_ops_mutex); Hi Steve, Should the mutex be held while calling the callback? If the newlink or dellink callbacks need to sleep (probably dellink specifically), I think holding the mutex might not be a good idea. Mike >+ return err; >+} >+ >+static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, >+ struct netlink_ext_ack *extack) >+{ >+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; >+ char ibdev_name[IB_DEVICE_NAME_MAX]; >+ const struct rdma_link_ops *ops; >+ char type[IFNAMSIZ]; >+ int err; >+ >+ err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, >+ nldev_policy, extack); >+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] || >+ !tb[RDMA_NLDEV_ATTR_LINK_TYPE]) >+ return -EINVAL; >+ >+ if (nla_len(tb[RDMA_NLDEV_ATTR_DEV_NAME]) > sizeof(ibdev_name) || >+ nla_len(tb[RDMA_NLDEV_ATTR_LINK_TYPE]) > sizeof(type)) >+ return -EINVAL; >+ >+ nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME], >+ sizeof(ibdev_name)); >+ nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type)); >+ >+ mutex_lock(&link_ops_mutex); >+ ops = link_ops_get(type); >+ if (ops) >+ err = ops->dellink(ibdev_name); >+ else >+ err = -ENODEV; >+ mutex_unlock(&link_ops_mutex); >+ >+ return err; >+} >+ > static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { > [RDMA_NLDEV_CMD_GET] = { > .doit = nldev_get_doit, >@@ -1112,6 +1235,14 @@ static int nldev_res_get_pd_dumpit(struct sk_buff >*skb, > .doit = nldev_set_doit, > .flags = RDMA_NL_ADMIN_PERM, > }, >+ [RDMA_NLDEV_CMD_NEWLINK] = { >+ .doit = nldev_newlink, >+ .flags = RDMA_NL_ADMIN_PERM, >+ }, >+ [RDMA_NLDEV_CMD_DELLINK] = { >+ .doit = nldev_dellink, >+ .flags = RDMA_NL_ADMIN_PERM, >+ }, > [RDMA_NLDEV_CMD_PORT_GET] = { > .doit = nldev_port_get_doit, > .dump = nldev_port_get_dumpit, >diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h >index 70218e6b5187..42daca1589bd 100644 >--- a/include/rdma/rdma_netlink.h >+++ b/include/rdma/rdma_netlink.h >@@ -99,4 +99,16 @@ int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, > * Returns true on success or false if no listeners. > */ > bool rdma_nl_chk_listeners(unsigned int group); >+ >+struct rdma_link_ops { >+ struct list_head list; >+ const char *type; >+ int (*newlink)(const char *ibdev_name, const char *ndev_name); >+ int (*dellink)(const char *ibdev_name); >+}; >+ >+void rdma_link_register(struct rdma_link_ops *ops); >+void rdma_link_unregister(struct rdma_link_ops *ops); >+ >+#define MODULE_ALIAS_RDMA_LINK(type) MODULE_ALIAS("rdma-link-" type) > #endif /* _RDMA_NETLINK_H */ >diff --git a/include/uapi/rdma/rdma_netlink.h >b/include/uapi/rdma/rdma_netlink.h >index f9c41bf59efc..dfdfc2b608b8 100644 >--- a/include/uapi/rdma/rdma_netlink.h >+++ b/include/uapi/rdma/rdma_netlink.h >@@ -229,9 +229,11 @@ enum rdma_nldev_command { > RDMA_NLDEV_CMD_GET, /* can dump */ > RDMA_NLDEV_CMD_SET, > >- /* 3 - 4 are free to use */ >+ RDMA_NLDEV_CMD_NEWLINK, > >- RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */ >+ RDMA_NLDEV_CMD_DELLINK, >+ >+ RDMA_NLDEV_CMD_PORT_GET, /* can dump */ > > /* 6 - 8 are free to use */ > >@@ -428,6 +430,11 @@ enum rdma_nldev_attr { > RDMA_NLDEV_ATTR_DRIVER_U64, /* u64 */ > > /* >+ * Identifies the rdma driver. eg: "rxe" or "siw" >+ */ >+ RDMA_NLDEV_ATTR_LINK_TYPE, /* string */ >+ >+ /* > * Always the end > */ > RDMA_NLDEV_ATTR_MAX >-- >1.8.3.1