On Fri, Nov 30, 2018 at 01:58:03PM -0800, Steve Wise wrote: > Add support for new LINK messages to allow adding and deleting rdma > interfaces. This will be used initially for soft rdma drivers which > instantiate device instances dynamically by the admin specifying a > netdev device to use. The rdma_rxe module will be the first user of > these messages. > > The design is modeled after RTNL_NEWLINK/DELLINK: rdma drivers > register with the rdma core if they provide link add/delete functions. > Each driver registers with a unique "type" string, that is used to > dispatch messages coming from user space. A new RDMA_NLDEV_ATTR > is defined for the "type" string. User mode will pass 3 attributes > in a NEWLINK message: RDMA_NLDEV_ATTR_DEV_NAME for the desired rdma > device name to be created, RDMA_NLDEV_ATTR_LINK_TYPE for the "type" > of link being added, and RDMA_NLDEV_ATTR_NDEV_NAME for the net_device > interface to use for this link. The DELLINK message will contain the > RDMA_NLDEV_ATTR_DEV_INDEX of the device to delete. > > Signed-off-by: Steve Wise <swise@xxxxxxxxxxxxxxxxxxxxx> > Reviewed-by: Leon Romanovsky <leonro@xxxxxxxxxxxx> > --- > drivers/infiniband/core/nldev.c | 139 +++++++++++++++++++++++++++++++++++++++ > include/rdma/ib_verbs.h | 2 + > include/rdma/rdma_netlink.h | 13 ++++ > include/uapi/rdma/rdma_netlink.h | 11 +++- > 4 files changed, 163 insertions(+), 2 deletions(-) > > diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c > index 63cc74483188..6df4c98da365 100644 > --- a/drivers/infiniband/core/nldev.c > +++ b/drivers/infiniband/core/nldev.c > @@ -33,6 +33,7 @@ > #include <linux/module.h> > #include <linux/pid.h> > #include <linux/pid_namespace.h> > +#include <linux/mutex.h> > #include <net/netlink.h> > #include <rdma/rdma_cm.h> > #include <rdma/rdma_netlink.h> > @@ -107,6 +108,8 @@ > [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 }, > [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 }, > [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 }, > + [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING, > + .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN }, > }; > > static int put_driver_name_print_type(struct sk_buff *msg, const char *name, > @@ -1103,6 +1106,134 @@ static int nldev_res_get_pd_dumpit(struct sk_buff *skb, > return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD); > } > > +static LIST_HEAD(link_ops); > +static DECLARE_RWSEM(link_ops_rwsem); > + > +static const struct rdma_link_ops *link_ops_get(const char *type) > +{ > + const struct rdma_link_ops *ops; > + > + list_for_each_entry(ops, &link_ops, list) { > + if (!strcmp(ops->type, type)) > + goto out; > + } > + ops = NULL; > +out: > + return ops; > +} > + > +void rdma_link_register(struct rdma_link_ops *ops) > +{ > + down_write(&link_ops_rwsem); > + if (link_ops_get(ops->type)) { > + WARN_ONCE("Duplicate rdma_link_ops! %s\n", ops->type); > + goto out; > + } > + list_add(&ops->list, &link_ops); > +out: > + up_write(&link_ops_rwsem); > +} > +EXPORT_SYMBOL(rdma_link_register); > + > +void rdma_link_unregister(struct rdma_link_ops *ops) > +{ > + down_write(&link_ops_rwsem); > + list_del(&ops->list); > + up_write(&link_ops_rwsem); > +} > +EXPORT_SYMBOL(rdma_link_unregister); > + > +static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, > + struct netlink_ext_ack *extack) > +{ > + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; > + char ibdev_name[IB_DEVICE_NAME_MAX]; > + const struct rdma_link_ops *ops; > + struct ib_device *device; > + char ndev_name[IFNAMSIZ]; > + char type[IFNAMSIZ]; > + int err; > + > + err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, > + nldev_policy, extack); > + if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] || > + !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME]) > + return -EINVAL; > + > + if (nla_len(tb[RDMA_NLDEV_ATTR_DEV_NAME]) > sizeof(ibdev_name) || > + nla_len(tb[RDMA_NLDEV_ATTR_LINK_TYPE]) > sizeof(type) || > + nla_len(tb[RDMA_NLDEV_ATTR_NDEV_NAME]) > sizeof(ndev_name)) > + return -EINVAL; Why is that? It is supposed to be tested by nldev_policy in nlmsg_parse stage. > + > + nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME], > + sizeof(ibdev_name)); > + if (strchr(ibdev_name, '%')) > + return -EINVAL; > + > + nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type)); > + nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME], > + sizeof(ndev_name)); > + > + down_read(&link_ops_rwsem); > + ops = link_ops_get(type); > +#ifdef CONFIG_MODULES > + if (!ops) { > + up_read(&link_ops_rwsem); > + request_module("rdma-link-%s", type); > + down_read(&link_ops_rwsem); > + ops = link_ops_get(type); > + } > +#endif > + if (ops) { > + device = ops->newlink(ibdev_name, ndev_name); > + if (IS_ERR(device)) > + err = PTR_ERR(device); > + else > + device->link_ops = ops; > + } else { > + err = -ENODEV; > + } > + up_read(&link_ops_rwsem); > + > + return err; > +} > + > +static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, > + struct netlink_ext_ack *extack) > +{ > + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; > + const struct rdma_link_ops *ops; > + struct device *dma_device; > + struct ib_device *device; > + u32 index; > + int err; > + > + err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, > + nldev_policy, extack); > + if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) > + return -EINVAL; > + > + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); > + device = ib_device_get_by_index(index); > + if (!device) > + return -EINVAL; > + > + ops = device->link_ops; > + > + /* > + * Deref the ib_device before deleting it. Otherwise we > + * deadlock unregistering the device. Hold a ref on the > + * underlying dma_device though to keep the memory around > + * until we're done. > + */ > + dma_device = get_device(device->dma_device); > + ib_device_put(device); > + err = ops ? ops->dellink(device) : -ENODEV; > + put_device(dma_device); > + > + return err; > +} > + > static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { > [RDMA_NLDEV_CMD_GET] = { > .doit = nldev_get_doit, > @@ -1112,6 +1243,14 @@ static int nldev_res_get_pd_dumpit(struct sk_buff *skb, > .doit = nldev_set_doit, > .flags = RDMA_NL_ADMIN_PERM, > }, > + [RDMA_NLDEV_CMD_NEWLINK] = { > + .doit = nldev_newlink, > + .flags = RDMA_NL_ADMIN_PERM, > + }, > + [RDMA_NLDEV_CMD_DELLINK] = { > + .doit = nldev_dellink, > + .flags = RDMA_NL_ADMIN_PERM, > + }, > [RDMA_NLDEV_CMD_PORT_GET] = { > .doit = nldev_port_get_doit, > .dump = nldev_port_get_dumpit, > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h > index ee8a6008e222..7a5ef71e5348 100644 > --- a/include/rdma/ib_verbs.h > +++ b/include/rdma/ib_verbs.h > @@ -2608,6 +2608,8 @@ struct ib_device { > */ > refcount_t refcount; > struct completion unreg_completion; > + > + const struct rdma_link_ops *link_ops; > }; > > struct ib_client { > diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h > index 70218e6b5187..9b50e957cbae 100644 > --- a/include/rdma/rdma_netlink.h > +++ b/include/rdma/rdma_netlink.h > @@ -99,4 +99,17 @@ int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, > * Returns true on success or false if no listeners. > */ > bool rdma_nl_chk_listeners(unsigned int group); > + > +struct rdma_link_ops { > + struct list_head list; > + const char *type; > + struct ib_device *(*newlink)(const char *ibdev_name, > + const char *ndev_name); > + int (*dellink)(struct ib_device *ibdev); > +}; > + > +void rdma_link_register(struct rdma_link_ops *ops); > +void rdma_link_unregister(struct rdma_link_ops *ops); > + > +#define MODULE_ALIAS_RDMA_LINK(type) MODULE_ALIAS("rdma-link-" type) > #endif /* _RDMA_NETLINK_H */ > diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h > index f9c41bf59efc..dfdfc2b608b8 100644 > --- a/include/uapi/rdma/rdma_netlink.h > +++ b/include/uapi/rdma/rdma_netlink.h > @@ -229,9 +229,11 @@ enum rdma_nldev_command { > RDMA_NLDEV_CMD_GET, /* can dump */ > RDMA_NLDEV_CMD_SET, > > - /* 3 - 4 are free to use */ > + RDMA_NLDEV_CMD_NEWLINK, > > - RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */ > + RDMA_NLDEV_CMD_DELLINK, > + > + RDMA_NLDEV_CMD_PORT_GET, /* can dump */ > > /* 6 - 8 are free to use */ > > @@ -428,6 +430,11 @@ enum rdma_nldev_attr { > RDMA_NLDEV_ATTR_DRIVER_U64, /* u64 */ > > /* > + * Identifies the rdma driver. eg: "rxe" or "siw" > + */ > + RDMA_NLDEV_ATTR_LINK_TYPE, /* string */ > + > + /* > * Always the end > */ > RDMA_NLDEV_ATTR_MAX > -- > 1.8.3.1 >
Attachment:
signature.asc
Description: PGP signature