Re: [PATCH v6 3/4] RDMA/Core: add RDMA_NLDEV_CMD_NEWLINK/DELLINK support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Dec 06, 2018 at 09:44:11AM -0600, Steve Wise wrote:
> Commenting on my own submission:
> 
> On 12/5/2018 9:14 AM, Steve Wise wrote:
> > Add support for new LINK messages to allow adding and deleting rdma
> > interfaces.  This will be used initially for soft rdma drivers which
> > instantiate device instances dynamically by the admin specifying a
> > netdev device to use.  The rdma_rxe module will be the first user of
> > these messages.
> >
> > The design is modeled after RTNL_NEWLINK/DELLINK:  rdma drivers
> > register with the rdma core if they provide link add/delete functions.
> > Each driver registers with a unique "type" string, that is used to
> > dispatch messages coming from user space.  A new RDMA_NLDEV_ATTR
> > is defined for the "type" string.  User mode will pass 3 attributes
> > in a NEWLINK message: RDMA_NLDEV_ATTR_DEV_NAME for the desired rdma
> > device name to be created, RDMA_NLDEV_ATTR_LINK_TYPE for the "type"
> > of link being added, and RDMA_NLDEV_ATTR_NDEV_NAME for the net_device
> > interface to use for this link.  The DELLINK message will contain the
> > RDMA_NLDEV_ATTR_DEV_INDEX of the device to delete.
> >
> > Signed-off-by: Steve Wise <swise@xxxxxxxxxxxxxxxxxxxxx>
> > Reviewed-by: Leon Romanovsky <leonro@xxxxxxxxxxxx>
> >  drivers/infiniband/core/nldev.c  | 134 +++++++++++++++++++++++++++++++++++++++
> >  include/rdma/ib_verbs.h          |   2 +
> >  include/rdma/rdma_netlink.h      |  13 ++++
> >  include/uapi/rdma/rdma_netlink.h |  11 +++-
> >  4 files changed, 158 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
> > index 9abbadb9e366..b6d97c592074 100644
> > +++ b/drivers/infiniband/core/nldev.c
> > @@ -33,6 +33,7 @@
> >  #include <linux/module.h>
> >  #include <linux/pid.h>
> >  #include <linux/pid_namespace.h>
> > +#include <linux/mutex.h>
> >  #include <net/netlink.h>
> >  #include <rdma/rdma_cm.h>
> >  #include <rdma/rdma_netlink.h>
> > @@ -107,6 +108,8 @@
> >  	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
> >  	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
> >  	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
> > +	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type = NLA_NUL_STRING,
> > +				    .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
> >  };
> >  
> >  static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
> > @@ -1104,6 +1107,129 @@ static int nldev_res_get_pd_dumpit(struct sk_buff *skb,
> >  	return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD);
> >  }
> >  
> > +static LIST_HEAD(link_ops);
> > +static DECLARE_RWSEM(link_ops_rwsem);
> > +
> > +static const struct rdma_link_ops *link_ops_get(const char *type)
> > +{
> > +	const struct rdma_link_ops *ops;
> > +
> > +	list_for_each_entry(ops, &link_ops, list) {
> > +		if (!strcmp(ops->type, type))
> > +			goto out;
> > +	}
> > +	ops = NULL;
> > +out:
> > +	return ops;
> > +}
> > +
> > +void rdma_link_register(struct rdma_link_ops *ops)
> > +{
> > +	down_write(&link_ops_rwsem);
> > +	if (link_ops_get(ops->type)) {
> > +		WARN_ONCE("Duplicate rdma_link_ops! %s\n", ops->type);
> > +		goto out;
> > +	}
> > +	list_add(&ops->list, &link_ops);
> > +out:
> > +	up_write(&link_ops_rwsem);
> > +}
> > +EXPORT_SYMBOL(rdma_link_register);
> > +
> > +void rdma_link_unregister(struct rdma_link_ops *ops)
> > +{
> > +	down_write(&link_ops_rwsem);
> > +	list_del(&ops->list);
> > +	up_write(&link_ops_rwsem);
> > +}
> > +EXPORT_SYMBOL(rdma_link_unregister);
> > +
> > +static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
> > +			  struct netlink_ext_ack *extack)
> > +{
> > +	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
> > +	char ibdev_name[IB_DEVICE_NAME_MAX];
> > +	const struct rdma_link_ops *ops;
> > +	struct ib_device *device;
> > +	char ndev_name[IFNAMSIZ];
> > +	char type[IFNAMSIZ];
> > +	int err;
> > +
> > +	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
> > +			  nldev_policy, extack);
> > +	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
> > +	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
> > +		return -EINVAL;
> > +
> > +	nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
> > +		    sizeof(ibdev_name));
> > +	if (strchr(ibdev_name, '%'))
> > +		return -EINVAL;
> > +
> > +	nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
> > +	nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
> > +		    sizeof(ndev_name));
> > +
> > +	down_read(&link_ops_rwsem);
> > +	ops = link_ops_get(type);
> > +#ifdef CONFIG_MODULES
> > +	if (!ops) {
> > +		up_read(&link_ops_rwsem);
> > +		request_module("rdma-link-%s", type);
> > +		down_read(&link_ops_rwsem);
> > +		ops = link_ops_get(type);
> > +	}
> > +#endif
> > +	if (ops) {
> > +		device = ops->newlink(ibdev_name, ndev_name);
> > +		if (IS_ERR(device))
> > +			err = PTR_ERR(device);
> > +		else
> > +			device->link_ops = ops;
> > +	} else {
> > +		err = -ENODEV;
> > +	}
> > +	up_read(&link_ops_rwsem);
> > +
> > +	return err;
> > +}
> > +
> > +static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
> > +			  struct netlink_ext_ack *extack)
> > +{
> > +	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
> > +	const struct rdma_link_ops *ops;
> > +	struct device *dma_device;
> > +	struct ib_device *device;
> > +	u32 index;
> > +	int err;
> > +
> > +	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
> > +			  nldev_policy, extack);
> > +	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
> > +		return -EINVAL;
> > +
> > +	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
> > +	device = ib_device_get_by_index(index);
> > +	if (!device)
> > +		return -EINVAL;
> > +
> > +	ops = device->link_ops;
> > +
> > +	/*
> > +	 * Deref the ib_device before deleting it.  Otherwise we
> > +	 * deadlock unregistering the device.  Hold a ref on the
> > +	 * underlying dma_device though to keep the memory around
> > +	 * until we're done.
> > +	 */
> > +	dma_device = get_device(device->dma_device);
> > +	ib_device_put(device);
> > +	err = ops ? ops->dellink(device) : -ENODEV;
> > +	put_device(dma_device);

What is the weirndness with dma_device? It should just be device

> There is still a problem with the above code.  If thread 1 is running
> this code and gets up to just after ib_device_put() but before calling
> ops->dellink, and then thread2 runs the rmmod code of rxe, thread 2 can
> remove the device and complete the module unload.

Yes.. You can't use a lot of  the ib device outside the
ib_device_get_by_index/ib_device_put critical region.

The driver, or perhaps the core code, has to be the one to unput it,
and the unput has to be done under proper locking to resolve the race.

Jason



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux