[RFC PATCH] IB/core: Simplify locking around data_vec

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Instead o using a complex scheme where we drop and re-acquire rwlock,
switch to a much simpler dual locking pattern where the sleepable lock
and the spinlock are held by all writers, while all readers only grab
the spinlock.

GID_TABLE_ENTRY_INVALID is used as the fence protected by that lock.

Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxxxx>
---
 drivers/infiniband/core/cache.c | 73 ++++++++++++++---------------------------
 1 file changed, 24 insertions(+), 49 deletions(-)

This is based off Parav's latest series.

What do you think Parav? Can you give it some test if you like it?

I only added two new mutex(table->locks)'s:
 - cleanup_gid_table_port(). This is only called during add/remove one
   device which is a sleepable context
 - ib_cache_update() This is only called from a work queue, so
   it should be fine

Couldn't find any reason not to structure the locking like this. The
comments alluded to some difference with IB, but I'm thinking that is
gone now, if it ever even existed?

I like this because the locking is now easier to audit and much
saner. We never have to drop a lock to call the driver, everything is
simply fully serialized. All writers have to hold the mutex, very
simple.

I think this will make the next series easier to review the locking
in.

diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index d6fc244301c024..29274b09e93657 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -80,25 +80,12 @@ struct ib_gid_table_entry {
 
 struct ib_gid_table {
 	int                  sz;
-	/* In RoCE, adding a GID to the table requires:
-	 * (a) Find if this GID is already exists.
-	 * (b) Find a free space.
-	 * (c) Write the new GID
-	 *
-	 * Delete requires different set of operations:
-	 * (a) Find the GID
-	 * (b) Delete it.
-	 *
-	 * Add/delete should be carried out atomically.
-	 * This is done by locking this mutex from multiple
-	 * writers. We don't need this lock for IB, as the MAD
-	 * layer replaces all entries. All data_vec entries
-	 * are locked by this lock.
-	 **/
-	struct mutex         lock;
-	/* This lock protects the table entries from being
-	 * read and written simultaneously.
+	/* Any writer to data_vec must hold this lock and the write side of
+	 * rwlock. readers must hold only rwlock. All writers must be in a
+	 * sleepable context.
 	 */
+	struct mutex         lock;
+	/* rwlock protects data_vec[ix]->props. */
 	rwlock_t	     rwlock;
 	struct ib_gid_table_entry *data_vec;
 };
@@ -154,24 +141,20 @@ EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
 
 static void del_roce_gid(struct ib_device *device, u8 port_num,
 			 struct ib_gid_table *table, int ix)
-	__releases(&table->rwlock) __acquires(&table->rwlock)
 {
 	pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
 		 device->name, port_num, ix,
 		 table->data_vec[ix].gid.raw);
 
-	write_unlock_irq(&table->rwlock);
 	if (rdma_cap_roce_gid_table(device, port_num))
 		device->del_gid(&table->data_vec[ix].attr,
 				&table->data_vec[ix].context);
 	dev_put(table->data_vec[ix].attr.ndev);
-	write_lock_irq(&table->rwlock);
 }
 
 static int add_roce_gid(struct ib_gid_table *table,
 			const union ib_gid *gid,
 			const struct ib_gid_attr *attr)
-	__releases(&table->rwlock) __acquires(&table->rwlock)
 {
 	struct ib_gid_table_entry *entry;
 	int ix = attr->index;
@@ -194,7 +177,6 @@ static int add_roce_gid(struct ib_gid_table *table,
 		return -EINVAL;
 	}
 
-	write_unlock_irq(&table->rwlock);
 	if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
 		ret = attr->device->add_gid(gid, attr, &entry->context);
 		if (ret) {
@@ -207,7 +189,6 @@ static int add_roce_gid(struct ib_gid_table *table,
 	dev_hold(attr->ndev);
 
 add_err:
-	write_lock_irq(&table->rwlock);
 	if (!ret)
 		pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
 			 attr->device->name, attr->port_num, ix, gid->raw);
@@ -221,9 +202,6 @@ static int add_roce_gid(struct ib_gid_table *table,
  * @gid:	GID content
  * @attr:	Attributes of the GID
  *
- * add_modify_gid() function expect that rwlock will be
- * write locked in all scenarios and that lock will be locked in
- * sleep-able (RoCE) scenarios.
  */
 static int add_modify_gid(struct ib_gid_table *table,
 			  const union ib_gid *gid,
@@ -233,6 +211,8 @@ static int add_modify_gid(struct ib_gid_table *table,
 
 	if (rdma_protocol_roce(attr->device, attr->port_num)) {
 		ret = add_roce_gid(table, gid, attr);
+		if (!ret)
+			return ret;
 	} else {
 		/*
 		 * Some HCA's report multiple GID entries with only one
@@ -244,14 +224,15 @@ static int add_modify_gid(struct ib_gid_table *table,
 			return 0;
 	}
 
-	if (!ret) {
-		memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid));
-		memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr));
-		table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID;
-	}
+	memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid));
+	memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr));
 
-	lockdep_assert_held(&table->rwlock);
-	return ret;
+	lockdep_assert_held(&table->lock);
+	write_lock_irq(&table->rwlock);
+	table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID;
+	write_unlock_irq(&table->rwlock);
+
+	return 0;
 }
 
 /**
@@ -262,23 +243,23 @@ static int add_modify_gid(struct ib_gid_table *table,
  * @table:	GID table of the IB device for a port
  * @ix:		GID entry index to delete
  *
- * del_gid() function expect that rwlock will be
- * write locked in all scenarios and that lock will be locked in
- * sleep-able (RoCE) scenarios.
  */
 static void del_gid(struct ib_device *ib_dev, u8 port,
 		    struct ib_gid_table *table, int ix)
 {
+	lockdep_assert_held(&table->lock);
+	write_lock_irq(&table->rwlock);
 	table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
+	write_unlock_irq(&table->rwlock);
+
 	if (rdma_protocol_roce(ib_dev, port))
 		del_roce_gid(ib_dev, port, table, ix);
 	memcpy(&table->data_vec[ix].gid, &zgid, sizeof(zgid));
 	memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr));
 	table->data_vec[ix].context = NULL;
-	lockdep_assert_held(&table->rwlock);
 }
 
-/* rwlock should be read locked */
+/* rwlock should be read locked, or lock should be held */
 static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
 		    const struct ib_gid_attr *val, bool default_gid,
 		    unsigned long mask, int *pempty)
@@ -374,7 +355,6 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
 	table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 
 	mutex_lock(&table->lock);
-	write_lock_irq(&table->rwlock);
 
 	ix = find_gid(table, gid, attr, default_gid, mask, &empty);
 	if (ix >= 0)
@@ -392,7 +372,6 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
 		dispatch_gid_change_event(ib_dev, port);
 
 out_unlock:
-	write_unlock_irq(&table->rwlock);
 	mutex_unlock(&table->lock);
 	if (ret)
 		pr_warn("%s: unable to add gid %pI6 error=%d\n",
@@ -441,7 +420,6 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
 	table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 
 	mutex_lock(&table->lock);
-	write_lock_irq(&table->rwlock);
 
 	ix = find_gid(table, gid, attr, false,
 		      GID_ATTR_FIND_MASK_GID	  |
@@ -457,7 +435,6 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
 	dispatch_gid_change_event(ib_dev, port);
 
 out_unlock:
-	write_unlock_irq(&table->rwlock);
 	mutex_unlock(&table->lock);
 	if (ret)
 		pr_debug("%s: can't delete gid %pI6 error=%d\n",
@@ -475,7 +452,6 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
 	table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 
 	mutex_lock(&table->lock);
-	write_lock_irq(&table->rwlock);
 
 	for (ix = 0; ix < table->sz; ix++) {
 		if (table->data_vec[ix].attr.ndev == ndev) {
@@ -484,7 +460,6 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
 		}
 	}
 
-	write_unlock_irq(&table->rwlock);
 	mutex_unlock(&table->lock);
 
 	if (deleted)
@@ -724,7 +699,7 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
 	if (!table)
 		return;
 
-	write_lock_irq(&table->rwlock);
+	mutex_lock(&table->lock);
 	for (i = 0; i < table->sz; ++i) {
 		if (memcmp(&table->data_vec[i].gid, &zgid,
 			   sizeof(table->data_vec[i].gid))) {
@@ -732,7 +707,7 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
 			deleted = true;
 		}
 	}
-	write_unlock_irq(&table->rwlock);
+	mutex_unlock(&table->lock);
 
 	if (deleted)
 		dispatch_gid_change_event(ib_dev, port);
@@ -1172,12 +1147,12 @@ static void ib_cache_update(struct ib_device *device,
 	if (!use_roce_gid_table) {
 		gid_attr.device = device;
 		gid_attr.port_num = port;
-		write_lock(&table->rwlock);
+		mutex_lock(&table->lock);
 		for (i = 0; i < gid_cache->table_len; i++) {
 			gid_attr.index = i;
 			add_modify_gid(table, gid_cache->table + i, &gid_attr);
 		}
-		write_unlock(&table->rwlock);
+		mutex_unlock(&table->lock);
 	}
 
 	device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
-- 
2.16.2

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux