[PATCH rdma-next v1] RDMA/core: Sync unregistration with netlink commands

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Parav Pandit <parav@xxxxxxxxxxxx>

Currently when rdma device is getting removed, get resource info can
race with device removal example below.

      CPU-0                                  CPU-1
    --------                               --------
    rdma_nl_rcv_msg()
       nldev_res_get_cq_dumpit()
          mutex_lock(device_lock);
          get device reference
          mutex_unlock(device_lock);        [..]
                                            ib_unregister_device()
                                            /* Valid reference to
                                             * device->dev exists.
                                             */
                                             ib_dealloc_device()

          [..]
          provider->fill_res_entry();

Even though device object is not freed, fill_res_entry() can get called
on device which doesn't from provider driver side.
Kernel core device reference count is not sufficient.

Similar race can occur with device renaming and device removal, where
device_rename() tries to rename a unregistered device. While this is fine
for devices of a class which are not net namespace aware, but it is
incorrect for net namespace aware class coming in subsequent series.
If a class is net namespace aware, than below [1] call trace is observed
in above situation.

Therefore, to avoid the such race, keep a reference count and let device
unregistration wait until all netlink users drop the reference.

[1] Call trace:
kernfs: ns required in 'infiniband' for 'mlx5_0'
WARNING: CPU: 18 PID: 44270 at fs/kernfs/dir.c:842 kernfs_find_ns+0x104/0x120
libahci i2c_core mlxfw libata dca [last unloaded: devlink]
RIP: 0010:kernfs_find_ns+0x104/0x120
Call Trace:
kernfs_find_and_get_ns+0x2e/0x50
sysfs_rename_link_ns+0x40/0xb0
device_rename+0xb2/0xf0
ib_device_rename+0xb3/0x100 [ib_core]
nldev_set_doit+0x165/0x190 [ib_core]
rdma_nl_rcv_msg+0x249/0x250 [ib_core]
? netlink_deliver_tap+0x8f/0x3e0
rdma_nl_rcv+0xd6/0x120 [ib_core]
netlink_unicast+0x17c/0x230
netlink_sendmsg+0x2f0/0x3e0
sock_sendmsg+0x30/0x40
__sys_sendto+0xdc/0x160

Fixes: da5c85078215 ("RDMA/nldev: add driver-specific resource tracking")
Signed-off-by: Parav Pandit <parav@xxxxxxxxxxxx>
Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx>
---
Changelog v0->v1:
 * Fixed type in comment
 * Rephrased comment
---
 drivers/infiniband/core/core_priv.h |  1 +
 drivers/infiniband/core/device.c    | 27 +++++++++++++++++++++++----
 drivers/infiniband/core/nldev.c     | 20 ++++++++++----------
 include/rdma/ib_verbs.h             |  8 +++++++-
 4 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 0b0ba44e773d..0da0a26c02b9 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -269,6 +269,7 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
 #endif

 struct ib_device *ib_device_get_by_index(u32 ifindex);
+void ib_device_put(struct ib_device *device);
 /* RDMA device netlink */
 void nldev_init(void);
 void nldev_exit(void);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 1a783724fcba..1dc8a5726d33 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -145,7 +145,8 @@ static struct ib_device *__ib_device_get_by_index(u32 index)
 }

 /*
- * Caller is responsible to return refrerence count by calling put_device()
+ * Caller must perform ib_device_put to return device reference count when
+ * ib_device_get_by_index() returns valid device pointer.
  */
 struct ib_device *ib_device_get_by_index(u32 index)
 {
@@ -153,13 +154,23 @@ struct ib_device *ib_device_get_by_index(u32 index)

 	down_read(&lists_rwsem);
 	device = __ib_device_get_by_index(index);
-	if (device)
-		get_device(&device->dev);
-
+	if (device) {
+		/* It is better do not return a device,
+		 * if unregistration has started.
+		 */
+		if (!refcount_inc_not_zero(&device->refcount))
+			device = NULL;
+	}
 	up_read(&lists_rwsem);
 	return device;
 }

+void ib_device_put(struct ib_device *device)
+{
+	if (refcount_dec_and_test(&device->refcount))
+		complete(&device->unreg_completion);
+}
+
 static struct ib_device *__ib_device_get_by_name(const char *name)
 {
 	struct ib_device *device;
@@ -307,6 +318,8 @@ struct ib_device *ib_alloc_device(size_t size)
 	rwlock_init(&device->client_data_lock);
 	INIT_LIST_HEAD(&device->client_data_list);
 	rdma_restrack_init(&device->res);
+	refcount_set(&device->refcount, 1);
+	init_completion(&device->unreg_completion);

 	return device;
 }
@@ -655,6 +668,12 @@ void ib_unregister_device(struct ib_device *device)
 	struct ib_client_data *context, *tmp;
 	unsigned long flags;

+	/* Wait for all netlink command callers to finish working on the
+	 * device.
+	 */
+	ib_device_put(device);
+	wait_for_completion(&device->unreg_completion);
+
 	mutex_lock(&device_mutex);

 	down_write(&lists_rwsem);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 573399e3ccc1..63cc74483188 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -636,13 +636,13 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,

 	nlmsg_end(msg, nlh);

-	put_device(&device->dev);
+	ib_device_put(device);
 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);

 err_free:
 	nlmsg_free(msg);
 err:
-	put_device(&device->dev);
+	ib_device_put(device);
 	return err;
 }

@@ -672,7 +672,7 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 		err = ib_device_rename(device, name);
 	}

-	put_device(&device->dev);
+	ib_device_put(device);
 	return err;
 }

@@ -756,14 +756,14 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 		goto err_free;

 	nlmsg_end(msg, nlh);
-	put_device(&device->dev);
+	ib_device_put(device);

 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);

 err_free:
 	nlmsg_free(msg);
 err:
-	put_device(&device->dev);
+	ib_device_put(device);
 	return err;
 }

@@ -820,7 +820,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
 	}

 out:
-	put_device(&device->dev);
+	ib_device_put(device);
 	cb->args[0] = idx;
 	return skb->len;
 }
@@ -859,13 +859,13 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 		goto err_free;

 	nlmsg_end(msg, nlh);
-	put_device(&device->dev);
+	ib_device_put(device);
 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);

 err_free:
 	nlmsg_free(msg);
 err:
-	put_device(&device->dev);
+	ib_device_put(device);
 	return ret;
 }

@@ -1058,7 +1058,7 @@ next:		idx++;
 	if (!filled)
 		goto err;

-	put_device(&device->dev);
+	ib_device_put(device);
 	return skb->len;

 res_err:
@@ -1069,7 +1069,7 @@ next:		idx++;
 	nlmsg_cancel(skb, nlh);

 err_index:
-	put_device(&device->dev);
+	ib_device_put(device);
 	return ret;
 }

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 3d727dcc007d..ccb279cc5d98 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -53,7 +53,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/netdevice.h>
-
+#include <linux/refcount.h>
 #include <linux/if_link.h>
 #include <linux/atomic.h>
 #include <linux/mmu_notifier.h>
@@ -2612,6 +2612,12 @@ struct ib_device {

 	const struct uapi_definition   *driver_def;
 	enum rdma_driver_id		driver_id;
+	/*
+	 * Provides synchronization between device unregistration and netlink
+	 * commands on a device. To be used only by core.
+	 */
+	refcount_t			refcount;
+	struct completion		unreg_completion;
 };

 struct ib_client {
--
2.19.1




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux