[PATCH v4 for-next 02/14] IB/core: Replace device_mutex with rwsem

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



device_mutex is replaced rwsem in order to allow
clients' work to run concurrently with register_device,
unregister_device, register_client and unregister_client.

Clients might want to iterate the devices list in work
contexts. Saying that, a client must finish its operations
on a device when client->remove(device) is called. Finishing
the client's operation requires waiting for all works it
created regarding this device. However, a work could
depend on the device_mutex to iterate the devices list
and we would have got a deadlock. Replacing this lock
with rwlock solves this problem.

rwlock isn't enough in this scenario. This is because running
register_client and register_device could result in running
client->add(device) twice (if we protect the list with down_read).
A similar problem arises for unregister_client and unregister_device
concurrently. In order to solve this, we mutually exclude those
operations with a mutex.

This follows Jason Gunthorpe's suggestion in:
http://www.spinics.net/lists/linux-rdma/msg25174.html

Signed-off-by: Matan Barak <matanb@xxxxxxxxxxxx>
---
 drivers/infiniband/core/device.c | 55 ++++++++++++++++++++++++++++++----------
 1 file changed, 41 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 18c1ece..bf19358 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -59,13 +59,27 @@ static LIST_HEAD(device_list);
 static LIST_HEAD(client_list);
 
 /*
- * device_mutex protects access to both device_list and client_list.
- * There's no real point to using multiple locks or something fancier
- * like an rwsem: we always access both lists, and we're always
- * modifying one list or the other list.  In any case this is not a
- * hot path so there's no point in trying to optimize.
+ * modify_mutex protects adding/removing devices and clients.
+ * Adding a client and a device simultaneously might cause calling
+ * client->add twice for the same device. Removing a client could
+ * cause calling client->remove twice. modify_mutex synchronizes
+ * those actions.
  */
-static DEFINE_MUTEX(device_mutex);
+static DEFINE_MUTEX(modify_mutex);
+
+/*
+ * lists_rwsem protects the client and device lists from
+ * read-while-write scenario. Adding/removing a device/client
+ * is protected by modify_mutex. However, other contexts of
+ * clients might want to iterate the device list. Client should
+ * cease all actions after client->remove returns. For that purpose,
+ * clients need to wait for their contexts which might need to
+ * iterate that list. Summing all that up, we have to use RCU/rwsem
+ * as either the iteration over client->remove could deadlock with
+ * the clients' works. We don't expect adding/removing of devices/clients
+ * to be done frequently, thus rwsem is favored here.
+ */
+static DECLARE_RWSEM(lists_rwsem);
 
 static int ib_device_check_mandatory(struct ib_device *device)
 {
@@ -276,7 +290,7 @@ int ib_register_device(struct ib_device *device,
 {
 	int ret;
 
-	mutex_lock(&device_mutex);
+	mutex_lock(&modify_mutex);
 
 	if (strchr(device->name, '%')) {
 		ret = alloc_name(device->name);
@@ -310,20 +324,24 @@ int ib_register_device(struct ib_device *device,
 		goto out;
 	}
 
+	down_write(&lists_rwsem);
 	list_add_tail(&device->core_list, &device_list);
+	up_write(&lists_rwsem);
+
 
 	device->reg_state = IB_DEV_REGISTERED;
 
 	{
 		struct ib_client *client;
 
+		/* down_read(&lists_rwsem) is implied by modify_mutex */
 		list_for_each_entry(client, &client_list, list)
 			if (client->add && !add_client_context(device, client))
 				client->add(device);
 	}
 
  out:
-	mutex_unlock(&device_mutex);
+	mutex_unlock(&modify_mutex);
 	return ret;
 }
 EXPORT_SYMBOL(ib_register_device);
@@ -340,18 +358,21 @@ void ib_unregister_device(struct ib_device *device)
 	struct ib_client_data *context, *tmp;
 	unsigned long flags;
 
-	mutex_lock(&device_mutex);
+	mutex_lock(&modify_mutex);
 
+	/* down_read(&lists_rwsem) is implied by modify_mutex */
 	list_for_each_entry_reverse(client, &client_list, list)
 		if (client->remove)
 			client->remove(device);
 
+	down_write(&lists_rwsem);
 	list_del(&device->core_list);
+	up_write(&lists_rwsem);
 
 	kfree(device->gid_tbl_len);
 	kfree(device->pkey_tbl_len);
 
-	mutex_unlock(&device_mutex);
+	mutex_unlock(&modify_mutex);
 
 	ib_device_unregister_sysfs(device);
 
@@ -381,14 +402,17 @@ int ib_register_client(struct ib_client *client)
 {
 	struct ib_device *device;
 
-	mutex_lock(&device_mutex);
+	mutex_lock(&modify_mutex);
 
+	down_write(&lists_rwsem);
 	list_add_tail(&client->list, &client_list);
+	up_write(&lists_rwsem);
+	/* down_read(&lists_rwsem) is implied by modify_mutex */
 	list_for_each_entry(device, &device_list, core_list)
 		if (client->add && !add_client_context(device, client))
 			client->add(device);
 
-	mutex_unlock(&device_mutex);
+	mutex_unlock(&modify_mutex);
 
 	return 0;
 }
@@ -408,8 +432,9 @@ void ib_unregister_client(struct ib_client *client)
 	struct ib_device *device;
 	unsigned long flags;
 
-	mutex_lock(&device_mutex);
+	mutex_lock(&modify_mutex);
 
+	/* down_read(&lists_rwsem) is implied by modify_mutex */
 	list_for_each_entry(device, &device_list, core_list) {
 		if (client->remove)
 			client->remove(device);
@@ -422,9 +447,11 @@ void ib_unregister_client(struct ib_client *client)
 			}
 		spin_unlock_irqrestore(&device->client_data_lock, flags);
 	}
+	down_write(&lists_rwsem);
 	list_del(&client->list);
+	up_write(&lists_rwsem);
 
-	mutex_unlock(&device_mutex);
+	mutex_unlock(&modify_mutex);
 }
 EXPORT_SYMBOL(ib_unregister_client);
 
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux