On Wed, Sep 26, 2018 at 01:13:45PM -0700, Steve Wise wrote: > Add support for new LINK messages to allow adding and deleting rdma > interfaces. This will be used initially for soft rdma drivers which > instantiate device instances dynamically by the admin specifying a netdev > device to use. The rdma_rxe module will be the first user of these > messages. > > The design is modeled after RTNL_NEWLINK/DELLINK: rdma drivers > register with the rdma core if they provide link add/delete functions. > Each driver registers with a unique "type" string, that is used to > dispatch messages coming from user space. A new RDMA_NLDEV_ATTR is > defined for the "type" string. User mode will pass 3 attributes in a > NEWLINK message: RDMA_NLDEV_ATTR_IBDEV_NAME for the desired rdma device > name to be created, RDMA_NLDEV_ATTR_LINK_TYPE for the "type" of link > being added, and RDMA_NLDEV_ATTR_NDEV_NAME for the net_device interface > to use for this link. The DELLINK message will contain the IBDEV_NAME > and LINK_TYPE attributes. > > Signed-off-by: Steve Wise <swise@xxxxxxxxxxxxxxxxxxxxx> > drivers/infiniband/core/nldev.c | 128 +++++++++++++++++++++++++++++++++++++++ > include/rdma/rdma_netlink.h | 13 ++++ > include/uapi/rdma/rdma_netlink.h | 9 ++- > 3 files changed, 149 insertions(+), 1 deletion(-) > > diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c > index e841725a0ee0..a5982cda6956 100644 > +++ b/drivers/infiniband/core/nldev.c > @@ -33,6 +33,7 @@ > #include <linux/module.h> > #include <linux/pid.h> > #include <linux/pid_namespace.h> > +#include <linux/mutex.h> > #include <net/netlink.h> > #include <rdma/rdma_cm.h> > #include <rdma/rdma_netlink.h> > @@ -107,6 +108,8 @@ > [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 }, > [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 }, > [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 }, > + [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING, > + .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN }, > }; > > static int put_driver_name_print_type(struct sk_buff *msg, const char *name, > @@ -1102,6 +1105,123 @@ static int nldev_res_get_pd_dumpit(struct sk_buff *skb, > return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD); > } > > +static LIST_HEAD(link_ops); > +static DEFINE_MUTEX(link_ops_mutex); > + > +static const struct rdma_link_ops *link_ops_get(const char *type) > +{ > + const struct rdma_link_ops *ops; > + > + mutex_lock(&link_ops_mutex); > + list_for_each_entry(ops, &link_ops, list) { > + if (!strcmp(ops->type, type)) > + goto out; > + } > + ops = NULL; > +out: > + mutex_unlock(&link_ops_mutex); > + return ops; > +} > + > +void rdma_link_register(struct rdma_link_ops *ops) > +{ > + if (link_ops_get(ops->type)) { > + WARN_ONCE("Duplicate rdma_link_ops! %s\n", ops->type); > + return; > + } > + mutex_lock(&link_ops_mutex); > + list_add(&ops->list, &link_ops); > + mutex_unlock(&link_ops_mutex); > +} > +EXPORT_SYMBOL(rdma_link_register); > + > +void rdma_link_unregister(struct rdma_link_ops *ops) > +{ > + mutex_lock(&link_ops_mutex); > + list_del(&ops->list); > + mutex_unlock(&link_ops_mutex); > +} > +EXPORT_SYMBOL(rdma_link_unregister); > + > +static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, > + struct netlink_ext_ack *extack) > +{ > + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; > + char ibdev_name[IB_DEVICE_NAME_MAX]; > + const struct rdma_link_ops *ops; > + char ndev_name[IFNAMSIZ]; > + char type[IFNAMSIZ]; > + int err; > + > +#ifdef CONFIG_MODULES > +replay: > +#endif > + err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, > + nldev_policy, extack); > + if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] || > + !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME]) { > + err = -EINVAL; > + goto err_out; > + } > + > + nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME], > + sizeof(ibdev_name)); > + nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type)); > + nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME], > + sizeof(ndev_name)); These nla_strlcpy's should fail the syscall if the name is longer than our buffer, not truncate and wrongly continue. > + pr_debug("ibdev_name |%s| type |%s| ndev_name |%s|\n", ibdev_name, > + type, ndev_name); > + > + ops = link_ops_get(type); > + if (!ops) { > +#ifdef CONFIG_MODULES > + request_module("rdma-link-%s", type); > + ops = link_ops_get(type); > + if (ops) > + goto replay; > +#endif > + err = -ENODEV; > + goto err_out; > + } > + > + err = ops->newlink(ibdev_name, ndev_name); Need to check there is no % in the ibdev_name or ib_register will do something wrong > diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h > index c369703fcd69..9bb8e1351a30 100644 > +++ b/include/rdma/rdma_netlink.h > @@ -99,4 +99,17 @@ int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, > * Returns 0 on success or a negative for no listeners. > */ > int rdma_nl_chk_listeners(unsigned int group); > + > +struct rdma_link_ops { > + struct list_head list; > + const char *type; > + int (*newlink)(char *ibdev_name, char *ndev_name); > + int (*dellink)(char *ibdev_name); const char * Jason