>-----Original Message----- >From: linux-rdma-owner@xxxxxxxxxxxxxxx [mailto:linux-rdma- >owner@xxxxxxxxxxxxxxx] On Behalf Of Steve Wise >Sent: Wednesday, December 5, 2018 10:14 AM >To: dledford@xxxxxxxxxx; jgg@xxxxxxxxxxxx >Cc: linux-rdma@xxxxxxxxxxxxxxx; BMT@xxxxxxxxxxxxxx; leon@xxxxxxxxxx; >markb@xxxxxxxxxxxx; yanjun.zhu@xxxxxxxxxx >Subject: [PATCH v6 3/4] RDMA/Core: add >RDMA_NLDEV_CMD_NEWLINK/DELLINK support > >Add support for new LINK messages to allow adding and deleting rdma >interfaces. This will be used initially for soft rdma drivers which >instantiate device instances dynamically by the admin specifying a >netdev device to use. The rdma_rxe module will be the first user of >these messages. > >The design is modeled after RTNL_NEWLINK/DELLINK: rdma drivers >register with the rdma core if they provide link add/delete functions. >Each driver registers with a unique "type" string, that is used to >dispatch messages coming from user space. A new RDMA_NLDEV_ATTR >is defined for the "type" string. User mode will pass 3 attributes >in a NEWLINK message: RDMA_NLDEV_ATTR_DEV_NAME for the desired >rdma >device name to be created, RDMA_NLDEV_ATTR_LINK_TYPE for the "type" >of link being added, and RDMA_NLDEV_ATTR_NDEV_NAME for the >net_device >interface to use for this link. The DELLINK message will contain the >RDMA_NLDEV_ATTR_DEV_INDEX of the device to delete. > >Signed-off-by: Steve Wise <swise@xxxxxxxxxxxxxxxxxxxxx> >Reviewed-by: Leon Romanovsky <leonro@xxxxxxxxxxxx> >--- > drivers/infiniband/core/nldev.c | 134 >+++++++++++++++++++++++++++++++++++++++ > include/rdma/ib_verbs.h | 2 + > include/rdma/rdma_netlink.h | 13 ++++ > include/uapi/rdma/rdma_netlink.h | 11 +++- > 4 files changed, 158 insertions(+), 2 deletions(-) > >diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c >index 9abbadb9e366..b6d97c592074 100644 >--- a/drivers/infiniband/core/nldev.c >+++ b/drivers/infiniband/core/nldev.c >@@ -33,6 +33,7 @@ > #include <linux/module.h> > #include <linux/pid.h> > #include <linux/pid_namespace.h> >+#include <linux/mutex.h> > #include <net/netlink.h> > #include <rdma/rdma_cm.h> > #include <rdma/rdma_netlink.h> >@@ -107,6 +108,8 @@ > [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 }, > [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 }, > [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 }, >+ [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = >NLA_NUL_STRING, >+ .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN >}, > }; > > static int put_driver_name_print_type(struct sk_buff *msg, const char >*name, >@@ -1104,6 +1107,129 @@ static int nldev_res_get_pd_dumpit(struct >sk_buff *skb, > return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD); > } > >+static LIST_HEAD(link_ops); >+static DECLARE_RWSEM(link_ops_rwsem); >+ >+static const struct rdma_link_ops *link_ops_get(const char *type) >+{ >+ const struct rdma_link_ops *ops; >+ >+ list_for_each_entry(ops, &link_ops, list) { >+ if (!strcmp(ops->type, type)) >+ goto out; >+ } >+ ops = NULL; >+out: >+ return ops; >+} >+ >+void rdma_link_register(struct rdma_link_ops *ops) >+{ >+ down_write(&link_ops_rwsem); >+ if (link_ops_get(ops->type)) { >+ WARN_ONCE("Duplicate rdma_link_ops! %s\n", ops->type); >+ goto out; >+ } >+ list_add(&ops->list, &link_ops); >+out: >+ up_write(&link_ops_rwsem); >+} >+EXPORT_SYMBOL(rdma_link_register); >+ >+void rdma_link_unregister(struct rdma_link_ops *ops) >+{ >+ down_write(&link_ops_rwsem); >+ list_del(&ops->list); >+ up_write(&link_ops_rwsem); >+} >+EXPORT_SYMBOL(rdma_link_unregister); >+ >+static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, >+ struct netlink_ext_ack *extack) >+{ >+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; >+ char ibdev_name[IB_DEVICE_NAME_MAX]; >+ const struct rdma_link_ops *ops; >+ struct ib_device *device; >+ char ndev_name[IFNAMSIZ]; >+ char type[IFNAMSIZ]; >+ int err; >+ >+ err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, >+ nldev_policy, extack); >+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] || >+ !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || >!tb[RDMA_NLDEV_ATTR_NDEV_NAME]) >+ return -EINVAL; >+ >+ nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME], >+ sizeof(ibdev_name)); >+ if (strchr(ibdev_name, '%')) >+ return -EINVAL; >+ >+ nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type)); >+ nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME], >+ sizeof(ndev_name)); >+ >+ down_read(&link_ops_rwsem); >+ ops = link_ops_get(type); >+#ifdef CONFIG_MODULES >+ if (!ops) { >+ up_read(&link_ops_rwsem); >+ request_module("rdma-link-%s", type); >+ down_read(&link_ops_rwsem); >+ ops = link_ops_get(type); >+ } >+#endif >+ if (ops) { >+ device = ops->newlink(ibdev_name, ndev_name); >+ if (IS_ERR(device)) >+ err = PTR_ERR(device); >+ else >+ device->link_ops = ops; >+ } else { >+ err = -ENODEV; >+ } >+ up_read(&link_ops_rwsem); >+ >+ return err; >+} The rwsem makes a lot more sense (than the mutex). Thanks! If you would like: Reviewed-by: Michael J. Ruhl <michael.j.ruhl@xxxxxxxxx> Mike >+ >+static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, >+ struct netlink_ext_ack *extack) >+{ >+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; >+ const struct rdma_link_ops *ops; >+ struct device *dma_device; >+ struct ib_device *device; >+ u32 index; >+ int err; >+ >+ err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, >+ nldev_policy, extack); >+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) >+ return -EINVAL; >+ >+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); >+ device = ib_device_get_by_index(index); >+ if (!device) >+ return -EINVAL; >+ >+ ops = device->link_ops; >+ >+ /* >+ * Deref the ib_device before deleting it. Otherwise we >+ * deadlock unregistering the device. Hold a ref on the >+ * underlying dma_device though to keep the memory around >+ * until we're done. >+ */ >+ dma_device = get_device(device->dma_device); >+ ib_device_put(device); >+ err = ops ? ops->dellink(device) : -ENODEV; >+ put_device(dma_device); >+ >+ return err; >+} >+ > static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = >{ > [RDMA_NLDEV_CMD_GET] = { > .doit = nldev_get_doit, >@@ -1113,6 +1239,14 @@ static int nldev_res_get_pd_dumpit(struct sk_buff >*skb, > .doit = nldev_set_doit, > .flags = RDMA_NL_ADMIN_PERM, > }, >+ [RDMA_NLDEV_CMD_NEWLINK] = { >+ .doit = nldev_newlink, >+ .flags = RDMA_NL_ADMIN_PERM, >+ }, >+ [RDMA_NLDEV_CMD_DELLINK] = { >+ .doit = nldev_dellink, >+ .flags = RDMA_NL_ADMIN_PERM, >+ }, > [RDMA_NLDEV_CMD_PORT_GET] = { > .doit = nldev_port_get_doit, > .dump = nldev_port_get_dumpit, >diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h >index 85021451eee0..237d87c52e3a 100644 >--- a/include/rdma/ib_verbs.h >+++ b/include/rdma/ib_verbs.h >@@ -2612,6 +2612,8 @@ struct ib_device { > */ > refcount_t refcount; > struct completion unreg_completion; >+ >+ const struct rdma_link_ops *link_ops; > }; > > struct ib_client { >diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h >index 70218e6b5187..9b50e957cbae 100644 >--- a/include/rdma/rdma_netlink.h >+++ b/include/rdma/rdma_netlink.h >@@ -99,4 +99,17 @@ int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr >*nlh, > * Returns true on success or false if no listeners. > */ > bool rdma_nl_chk_listeners(unsigned int group); >+ >+struct rdma_link_ops { >+ struct list_head list; >+ const char *type; >+ struct ib_device *(*newlink)(const char *ibdev_name, >+ const char *ndev_name); >+ int (*dellink)(struct ib_device *ibdev); >+}; >+ >+void rdma_link_register(struct rdma_link_ops *ops); >+void rdma_link_unregister(struct rdma_link_ops *ops); >+ >+#define MODULE_ALIAS_RDMA_LINK(type) MODULE_ALIAS("rdma-link-" >type) > #endif /* _RDMA_NETLINK_H */ >diff --git a/include/uapi/rdma/rdma_netlink.h >b/include/uapi/rdma/rdma_netlink.h >index f9c41bf59efc..dfdfc2b608b8 100644 >--- a/include/uapi/rdma/rdma_netlink.h >+++ b/include/uapi/rdma/rdma_netlink.h >@@ -229,9 +229,11 @@ enum rdma_nldev_command { > RDMA_NLDEV_CMD_GET, /* can dump */ > RDMA_NLDEV_CMD_SET, > >- /* 3 - 4 are free to use */ >+ RDMA_NLDEV_CMD_NEWLINK, > >- RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */ >+ RDMA_NLDEV_CMD_DELLINK, >+ >+ RDMA_NLDEV_CMD_PORT_GET, /* can dump */ > > /* 6 - 8 are free to use */ > >@@ -428,6 +430,11 @@ enum rdma_nldev_attr { > RDMA_NLDEV_ATTR_DRIVER_U64, /* u64 */ > > /* >+ * Identifies the rdma driver. eg: "rxe" or "siw" >+ */ >+ RDMA_NLDEV_ATTR_LINK_TYPE, /* string */ >+ >+ /* > * Always the end > */ > RDMA_NLDEV_ATTR_MAX >-- >1.8.3.1