Re: [PATCH rdma-next] RDMA/core: Sync unregistration with netlink commands

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Nov 15, 2018 at 01:43:34PM +0200, Gal Pressman wrote:
> On 15-Nov-18 04:05, Leon Romanovsky wrote:
> > From: Parav Pandit <parav@xxxxxxxxxxxx>
> >
> > Currently when rdma device is getting removed, get resource info can
> > race with device removal example below.
> >
> >       CPU-0                                  CPU-1
> >     --------                               --------
> >     rdma_nl_rcv_msg()
> >        nldev_res_get_cq_dumpit()
> >           mutex_lock(device_lock);
> >           get device reference
> >           mutex_unlock(device_lock);        [..]
> >                                             ib_unregister_device()
> >                                             /* Valid reference to
> >                                              * device->dev exists.
> >                                              */
> >                                              ib_dealloc_device()
> >
> >           [..]
> >           provider->fill_res_entry();
> >
> > Even though device object is not freed, fill_res_entry() can get called
> > on device which doesn't from provider driver side.
> > Kernel core device reference count is not sufficient.
> >
> > Similar race can occur with device renaming and device removal, where
> > device_rename() tries to rename a unregistered device. While this is fine
> > for devices of a class which are not net namespace aware, but it is
> > incorrect for net namespace aware class coming in subsequent series.
> > If a class is net namespace aware, than below [1] call trace is observed
> > in above situation.
> >
> > Therefore, to avoid the such race, keep a reference count and let device
> > unregistration wait until all netlink users drop the reference.
> >
> > [1] Call trace:
> > kernfs: ns required in 'infiniband' for 'mlx5_0'
> > WARNING: CPU: 18 PID: 44270 at fs/kernfs/dir.c:842 kernfs_find_ns+0x104/0x120
> > libahci i2c_core mlxfw libata dca [last unloaded: devlink]
> > RIP: 0010:kernfs_find_ns+0x104/0x120
> > Call Trace:
> > kernfs_find_and_get_ns+0x2e/0x50
> > sysfs_rename_link_ns+0x40/0xb0
> > device_rename+0xb2/0xf0
> > ib_device_rename+0xb3/0x100 [ib_core]
> > nldev_set_doit+0x165/0x190 [ib_core]
> > rdma_nl_rcv_msg+0x249/0x250 [ib_core]
> > ? netlink_deliver_tap+0x8f/0x3e0
> > rdma_nl_rcv+0xd6/0x120 [ib_core]
> > netlink_unicast+0x17c/0x230
> > netlink_sendmsg+0x2f0/0x3e0
> > sock_sendmsg+0x30/0x40
> > __sys_sendto+0xdc/0x160
> >
> > Fixes: da5c85078215 ("RDMA/nldev: add driver-specific resource tracking")
> > Signed-off-by: Parav Pandit <parav@xxxxxxxxxxxx>
> > Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx>
> > ---
> >  drivers/infiniband/core/core_priv.h |  1 +
> >  drivers/infiniband/core/device.c    | 27 +++++++++++++++++++++++----
> >  drivers/infiniband/core/nldev.c     | 20 ++++++++++----------
> >  include/rdma/ib_verbs.h             |  8 +++++++-
> >  4 files changed, 41 insertions(+), 15 deletions(-)
> >
> > diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
> > index 0b0ba44e773d..0da0a26c02b9 100644
> > --- a/drivers/infiniband/core/core_priv.h
> > +++ b/drivers/infiniband/core/core_priv.h
> > @@ -269,6 +269,7 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
> >  #endif
> >
> >  struct ib_device *ib_device_get_by_index(u32 ifindex);
> > +void ib_device_put(struct ib_device *device);
> >  /* RDMA device netlink */
> >  void nldev_init(void);
> >  void nldev_exit(void);
> > diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
> > index 1a783724fcba..842853b16941 100644
> > --- a/drivers/infiniband/core/device.c
> > +++ b/drivers/infiniband/core/device.c
> > @@ -145,7 +145,8 @@ static struct ib_device *__ib_device_get_by_index(u32 index)
> >  }
> >
> >  /*
> > - * Caller is responsible to return refrerence count by calling put_device()
> > + * Caller must perform ib_device_put to return device refrerence count when
>
> Typo in reference.

Thanks for the feedback, sending v1.

>
> > + * ib_device_get_by_index() returns valid device pointer.
> >   */
> >  struct ib_device *ib_device_get_by_index(u32 index)
> >  {
> > @@ -153,13 +154,23 @@ struct ib_device *ib_device_get_by_index(u32 index)
> >
> >  	down_read(&lists_rwsem);
> >  	device = __ib_device_get_by_index(index);
> > -	if (device)
> > -		get_device(&device->dev);
> > -
> > +	if (device) {
> > +		/* If unregistration has started, than better not
>
> This should probably be "then" (maybe just remove the word).
>
> > +		 * return a device.
> > +		 */
> > +		if (!refcount_inc_not_zero(&device->refcount))
> > +			device = NULL;
> > +	}
> >  	up_read(&lists_rwsem);
> >  	return device;
> >  }
> >
> > +void ib_device_put(struct ib_device *device)
> > +{
> > +	if (refcount_dec_and_test(&device->refcount))
> > +		complete(&device->unreg_completion);
> > +}
> > +
> >  static struct ib_device *__ib_device_get_by_name(const char *name)
> >  {
> >  	struct ib_device *device;
> > @@ -307,6 +318,8 @@ struct ib_device *ib_alloc_device(size_t size)
> >  	rwlock_init(&device->client_data_lock);
> >  	INIT_LIST_HEAD(&device->client_data_list);
> >  	rdma_restrack_init(&device->res);
> > +	refcount_set(&device->refcount, 1);
> > +	init_completion(&device->unreg_completion);
> >
> >  	return device;
> >  }
> > @@ -655,6 +668,12 @@ void ib_unregister_device(struct ib_device *device)
> >  	struct ib_client_data *context, *tmp;
> >  	unsigned long flags;
> >
> > +	/* Wait for all netlink command callers to finish working on the
> > +	 * device.
> > +	 */
> > +	ib_device_put(device);
> > +	wait_for_completion(&device->unreg_completion);
> > +
> >  	mutex_lock(&device_mutex);
> >
> >  	down_write(&lists_rwsem);
> > diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
> > index 573399e3ccc1..63cc74483188 100644
> > --- a/drivers/infiniband/core/nldev.c
> > +++ b/drivers/infiniband/core/nldev.c
> > @@ -636,13 +636,13 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
> >
> >  	nlmsg_end(msg, nlh);
> >
> > -	put_device(&device->dev);
> > +	ib_device_put(device);
> >  	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
> >
> >  err_free:
> >  	nlmsg_free(msg);
> >  err:
> > -	put_device(&device->dev);
> > +	ib_device_put(device);
> >  	return err;
> >  }
> >
> > @@ -672,7 +672,7 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
> >  		err = ib_device_rename(device, name);
> >  	}
> >
> > -	put_device(&device->dev);
> > +	ib_device_put(device);
> >  	return err;
> >  }
> >
> > @@ -756,14 +756,14 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
> >  		goto err_free;
> >
> >  	nlmsg_end(msg, nlh);
> > -	put_device(&device->dev);
> > +	ib_device_put(device);
> >
> >  	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
> >
> >  err_free:
> >  	nlmsg_free(msg);
> >  err:
> > -	put_device(&device->dev);
> > +	ib_device_put(device);
> >  	return err;
> >  }
> >
> > @@ -820,7 +820,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
> >  	}
> >
> >  out:
> > -	put_device(&device->dev);
> > +	ib_device_put(device);
> >  	cb->args[0] = idx;
> >  	return skb->len;
> >  }
> > @@ -859,13 +859,13 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
> >  		goto err_free;
> >
> >  	nlmsg_end(msg, nlh);
> > -	put_device(&device->dev);
> > +	ib_device_put(device);
> >  	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
> >
> >  err_free:
> >  	nlmsg_free(msg);
> >  err:
> > -	put_device(&device->dev);
> > +	ib_device_put(device);
> >  	return ret;
> >  }
> >
> > @@ -1058,7 +1058,7 @@ next:		idx++;
> >  	if (!filled)
> >  		goto err;
> >
> > -	put_device(&device->dev);
> > +	ib_device_put(device);
> >  	return skb->len;
> >
> >  res_err:
> > @@ -1069,7 +1069,7 @@ next:		idx++;
> >  	nlmsg_cancel(skb, nlh);
> >
> >  err_index:
> > -	put_device(&device->dev);
> > +	ib_device_put(device);
> >  	return ret;
> >  }
> >
> > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> > index 3d727dcc007d..ccb279cc5d98 100644
> > --- a/include/rdma/ib_verbs.h
> > +++ b/include/rdma/ib_verbs.h
> > @@ -53,7 +53,7 @@
> >  #include <linux/string.h>
> >  #include <linux/slab.h>
> >  #include <linux/netdevice.h>
> > -
> > +#include <linux/refcount.h>
> >  #include <linux/if_link.h>
> >  #include <linux/atomic.h>
> >  #include <linux/mmu_notifier.h>
> > @@ -2612,6 +2612,12 @@ struct ib_device {
> >
> >  	const struct uapi_definition   *driver_def;
> >  	enum rdma_driver_id		driver_id;
> > +	/*
> > +	 * Provides synchronization between device unregistration and netlink
> > +	 * commands on a device. To be used only by core.
> > +	 */
> > +	refcount_t			refcount;
> > +	struct completion		unreg_completion;
> >  };
> >
> >  struct ib_client {
> >
>

Attachment: signature.asc
Description: PGP signature


[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux