On Thu, Nov 15, 2018 at 01:43:34PM +0200, Gal Pressman wrote: > On 15-Nov-18 04:05, Leon Romanovsky wrote: > > From: Parav Pandit <parav@xxxxxxxxxxxx> > > > > Currently when rdma device is getting removed, get resource info can > > race with device removal example below. > > > > CPU-0 CPU-1 > > -------- -------- > > rdma_nl_rcv_msg() > > nldev_res_get_cq_dumpit() > > mutex_lock(device_lock); > > get device reference > > mutex_unlock(device_lock); [..] > > ib_unregister_device() > > /* Valid reference to > > * device->dev exists. > > */ > > ib_dealloc_device() > > > > [..] > > provider->fill_res_entry(); > > > > Even though device object is not freed, fill_res_entry() can get called > > on device which doesn't from provider driver side. > > Kernel core device reference count is not sufficient. > > > > Similar race can occur with device renaming and device removal, where > > device_rename() tries to rename a unregistered device. While this is fine > > for devices of a class which are not net namespace aware, but it is > > incorrect for net namespace aware class coming in subsequent series. > > If a class is net namespace aware, than below [1] call trace is observed > > in above situation. > > > > Therefore, to avoid the such race, keep a reference count and let device > > unregistration wait until all netlink users drop the reference. > > > > [1] Call trace: > > kernfs: ns required in 'infiniband' for 'mlx5_0' > > WARNING: CPU: 18 PID: 44270 at fs/kernfs/dir.c:842 kernfs_find_ns+0x104/0x120 > > libahci i2c_core mlxfw libata dca [last unloaded: devlink] > > RIP: 0010:kernfs_find_ns+0x104/0x120 > > Call Trace: > > kernfs_find_and_get_ns+0x2e/0x50 > > sysfs_rename_link_ns+0x40/0xb0 > > device_rename+0xb2/0xf0 > > ib_device_rename+0xb3/0x100 [ib_core] > > nldev_set_doit+0x165/0x190 [ib_core] > > rdma_nl_rcv_msg+0x249/0x250 [ib_core] > > ? netlink_deliver_tap+0x8f/0x3e0 > > rdma_nl_rcv+0xd6/0x120 [ib_core] > > netlink_unicast+0x17c/0x230 > > netlink_sendmsg+0x2f0/0x3e0 > > sock_sendmsg+0x30/0x40 > > __sys_sendto+0xdc/0x160 > > > > Fixes: da5c85078215 ("RDMA/nldev: add driver-specific resource tracking") > > Signed-off-by: Parav Pandit <parav@xxxxxxxxxxxx> > > Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx> > > --- > > drivers/infiniband/core/core_priv.h | 1 + > > drivers/infiniband/core/device.c | 27 +++++++++++++++++++++++---- > > drivers/infiniband/core/nldev.c | 20 ++++++++++---------- > > include/rdma/ib_verbs.h | 8 +++++++- > > 4 files changed, 41 insertions(+), 15 deletions(-) > > > > diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h > > index 0b0ba44e773d..0da0a26c02b9 100644 > > --- a/drivers/infiniband/core/core_priv.h > > +++ b/drivers/infiniband/core/core_priv.h > > @@ -269,6 +269,7 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map, > > #endif > > > > struct ib_device *ib_device_get_by_index(u32 ifindex); > > +void ib_device_put(struct ib_device *device); > > /* RDMA device netlink */ > > void nldev_init(void); > > void nldev_exit(void); > > diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c > > index 1a783724fcba..842853b16941 100644 > > --- a/drivers/infiniband/core/device.c > > +++ b/drivers/infiniband/core/device.c > > @@ -145,7 +145,8 @@ static struct ib_device *__ib_device_get_by_index(u32 index) > > } > > > > /* > > - * Caller is responsible to return refrerence count by calling put_device() > > + * Caller must perform ib_device_put to return device refrerence count when > > Typo in reference. Thanks for the feedback, sending v1. > > > + * ib_device_get_by_index() returns valid device pointer. > > */ > > struct ib_device *ib_device_get_by_index(u32 index) > > { > > @@ -153,13 +154,23 @@ struct ib_device *ib_device_get_by_index(u32 index) > > > > down_read(&lists_rwsem); > > device = __ib_device_get_by_index(index); > > - if (device) > > - get_device(&device->dev); > > - > > + if (device) { > > + /* If unregistration has started, than better not > > This should probably be "then" (maybe just remove the word). > > > + * return a device. > > + */ > > + if (!refcount_inc_not_zero(&device->refcount)) > > + device = NULL; > > + } > > up_read(&lists_rwsem); > > return device; > > } > > > > +void ib_device_put(struct ib_device *device) > > +{ > > + if (refcount_dec_and_test(&device->refcount)) > > + complete(&device->unreg_completion); > > +} > > + > > static struct ib_device *__ib_device_get_by_name(const char *name) > > { > > struct ib_device *device; > > @@ -307,6 +318,8 @@ struct ib_device *ib_alloc_device(size_t size) > > rwlock_init(&device->client_data_lock); > > INIT_LIST_HEAD(&device->client_data_list); > > rdma_restrack_init(&device->res); > > + refcount_set(&device->refcount, 1); > > + init_completion(&device->unreg_completion); > > > > return device; > > } > > @@ -655,6 +668,12 @@ void ib_unregister_device(struct ib_device *device) > > struct ib_client_data *context, *tmp; > > unsigned long flags; > > > > + /* Wait for all netlink command callers to finish working on the > > + * device. > > + */ > > + ib_device_put(device); > > + wait_for_completion(&device->unreg_completion); > > + > > mutex_lock(&device_mutex); > > > > down_write(&lists_rwsem); > > diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c > > index 573399e3ccc1..63cc74483188 100644 > > --- a/drivers/infiniband/core/nldev.c > > +++ b/drivers/infiniband/core/nldev.c > > @@ -636,13 +636,13 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, > > > > nlmsg_end(msg, nlh); > > > > - put_device(&device->dev); > > + ib_device_put(device); > > return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); > > > > err_free: > > nlmsg_free(msg); > > err: > > - put_device(&device->dev); > > + ib_device_put(device); > > return err; > > } > > > > @@ -672,7 +672,7 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, > > err = ib_device_rename(device, name); > > } > > > > - put_device(&device->dev); > > + ib_device_put(device); > > return err; > > } > > > > @@ -756,14 +756,14 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, > > goto err_free; > > > > nlmsg_end(msg, nlh); > > - put_device(&device->dev); > > + ib_device_put(device); > > > > return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); > > > > err_free: > > nlmsg_free(msg); > > err: > > - put_device(&device->dev); > > + ib_device_put(device); > > return err; > > } > > > > @@ -820,7 +820,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb, > > } > > > > out: > > - put_device(&device->dev); > > + ib_device_put(device); > > cb->args[0] = idx; > > return skb->len; > > } > > @@ -859,13 +859,13 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, > > goto err_free; > > > > nlmsg_end(msg, nlh); > > - put_device(&device->dev); > > + ib_device_put(device); > > return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); > > > > err_free: > > nlmsg_free(msg); > > err: > > - put_device(&device->dev); > > + ib_device_put(device); > > return ret; > > } > > > > @@ -1058,7 +1058,7 @@ next: idx++; > > if (!filled) > > goto err; > > > > - put_device(&device->dev); > > + ib_device_put(device); > > return skb->len; > > > > res_err: > > @@ -1069,7 +1069,7 @@ next: idx++; > > nlmsg_cancel(skb, nlh); > > > > err_index: > > - put_device(&device->dev); > > + ib_device_put(device); > > return ret; > > } > > > > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h > > index 3d727dcc007d..ccb279cc5d98 100644 > > --- a/include/rdma/ib_verbs.h > > +++ b/include/rdma/ib_verbs.h > > @@ -53,7 +53,7 @@ > > #include <linux/string.h> > > #include <linux/slab.h> > > #include <linux/netdevice.h> > > - > > +#include <linux/refcount.h> > > #include <linux/if_link.h> > > #include <linux/atomic.h> > > #include <linux/mmu_notifier.h> > > @@ -2612,6 +2612,12 @@ struct ib_device { > > > > const struct uapi_definition *driver_def; > > enum rdma_driver_id driver_id; > > + /* > > + * Provides synchronization between device unregistration and netlink > > + * commands on a device. To be used only by core. > > + */ > > + refcount_t refcount; > > + struct completion unreg_completion; > > }; > > > > struct ib_client { > > >
Attachment:
signature.asc
Description: PGP signature