Instead o using a complex scheme where we drop and re-acquire rwlock, switch to a much simpler dual locking pattern where the sleepable lock and the spinlock are held by all writers, while all readers only grab the spinlock. GID_TABLE_ENTRY_INVALID is used as the fence protected by that lock. Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxxxx> --- drivers/infiniband/core/cache.c | 73 ++++++++++++++--------------------------- 1 file changed, 24 insertions(+), 49 deletions(-) This is based off Parav's latest series. What do you think Parav? Can you give it some test if you like it? I only added two new mutex(table->locks)'s: - cleanup_gid_table_port(). This is only called during add/remove one device which is a sleepable context - ib_cache_update() This is only called from a work queue, so it should be fine Couldn't find any reason not to structure the locking like this. The comments alluded to some difference with IB, but I'm thinking that is gone now, if it ever even existed? I like this because the locking is now easier to audit and much saner. We never have to drop a lock to call the driver, everything is simply fully serialized. All writers have to hold the mutex, very simple. I think this will make the next series easier to review the locking in. diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index d6fc244301c024..29274b09e93657 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -80,25 +80,12 @@ struct ib_gid_table_entry { struct ib_gid_table { int sz; - /* In RoCE, adding a GID to the table requires: - * (a) Find if this GID is already exists. - * (b) Find a free space. - * (c) Write the new GID - * - * Delete requires different set of operations: - * (a) Find the GID - * (b) Delete it. - * - * Add/delete should be carried out atomically. - * This is done by locking this mutex from multiple - * writers. We don't need this lock for IB, as the MAD - * layer replaces all entries. All data_vec entries - * are locked by this lock. - **/ - struct mutex lock; - /* This lock protects the table entries from being - * read and written simultaneously. + /* Any writer to data_vec must hold this lock and the write side of + * rwlock. readers must hold only rwlock. All writers must be in a + * sleepable context. */ + struct mutex lock; + /* rwlock protects data_vec[ix]->props. */ rwlock_t rwlock; struct ib_gid_table_entry *data_vec; }; @@ -154,24 +141,20 @@ EXPORT_SYMBOL(ib_cache_gid_parse_type_str); static void del_roce_gid(struct ib_device *device, u8 port_num, struct ib_gid_table *table, int ix) - __releases(&table->rwlock) __acquires(&table->rwlock) { pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, device->name, port_num, ix, table->data_vec[ix].gid.raw); - write_unlock_irq(&table->rwlock); if (rdma_cap_roce_gid_table(device, port_num)) device->del_gid(&table->data_vec[ix].attr, &table->data_vec[ix].context); dev_put(table->data_vec[ix].attr.ndev); - write_lock_irq(&table->rwlock); } static int add_roce_gid(struct ib_gid_table *table, const union ib_gid *gid, const struct ib_gid_attr *attr) - __releases(&table->rwlock) __acquires(&table->rwlock) { struct ib_gid_table_entry *entry; int ix = attr->index; @@ -194,7 +177,6 @@ static int add_roce_gid(struct ib_gid_table *table, return -EINVAL; } - write_unlock_irq(&table->rwlock); if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) { ret = attr->device->add_gid(gid, attr, &entry->context); if (ret) { @@ -207,7 +189,6 @@ static int add_roce_gid(struct ib_gid_table *table, dev_hold(attr->ndev); add_err: - write_lock_irq(&table->rwlock); if (!ret) pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, attr->device->name, attr->port_num, ix, gid->raw); @@ -221,9 +202,6 @@ static int add_roce_gid(struct ib_gid_table *table, * @gid: GID content * @attr: Attributes of the GID * - * add_modify_gid() function expect that rwlock will be - * write locked in all scenarios and that lock will be locked in - * sleep-able (RoCE) scenarios. */ static int add_modify_gid(struct ib_gid_table *table, const union ib_gid *gid, @@ -233,6 +211,8 @@ static int add_modify_gid(struct ib_gid_table *table, if (rdma_protocol_roce(attr->device, attr->port_num)) { ret = add_roce_gid(table, gid, attr); + if (!ret) + return ret; } else { /* * Some HCA's report multiple GID entries with only one @@ -244,14 +224,15 @@ static int add_modify_gid(struct ib_gid_table *table, return 0; } - if (!ret) { - memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid)); - memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr)); - table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID; - } + memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid)); + memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr)); - lockdep_assert_held(&table->rwlock); - return ret; + lockdep_assert_held(&table->lock); + write_lock_irq(&table->rwlock); + table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID; + write_unlock_irq(&table->rwlock); + + return 0; } /** @@ -262,23 +243,23 @@ static int add_modify_gid(struct ib_gid_table *table, * @table: GID table of the IB device for a port * @ix: GID entry index to delete * - * del_gid() function expect that rwlock will be - * write locked in all scenarios and that lock will be locked in - * sleep-able (RoCE) scenarios. */ static void del_gid(struct ib_device *ib_dev, u8 port, struct ib_gid_table *table, int ix) { + lockdep_assert_held(&table->lock); + write_lock_irq(&table->rwlock); table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID; + write_unlock_irq(&table->rwlock); + if (rdma_protocol_roce(ib_dev, port)) del_roce_gid(ib_dev, port, table, ix); memcpy(&table->data_vec[ix].gid, &zgid, sizeof(zgid)); memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr)); table->data_vec[ix].context = NULL; - lockdep_assert_held(&table->rwlock); } -/* rwlock should be read locked */ +/* rwlock should be read locked, or lock should be held */ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, const struct ib_gid_attr *val, bool default_gid, unsigned long mask, int *pempty) @@ -374,7 +355,6 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port, table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; mutex_lock(&table->lock); - write_lock_irq(&table->rwlock); ix = find_gid(table, gid, attr, default_gid, mask, &empty); if (ix >= 0) @@ -392,7 +372,6 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port, dispatch_gid_change_event(ib_dev, port); out_unlock: - write_unlock_irq(&table->rwlock); mutex_unlock(&table->lock); if (ret) pr_warn("%s: unable to add gid %pI6 error=%d\n", @@ -441,7 +420,6 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; mutex_lock(&table->lock); - write_lock_irq(&table->rwlock); ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID | @@ -457,7 +435,6 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, dispatch_gid_change_event(ib_dev, port); out_unlock: - write_unlock_irq(&table->rwlock); mutex_unlock(&table->lock); if (ret) pr_debug("%s: can't delete gid %pI6 error=%d\n", @@ -475,7 +452,6 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; mutex_lock(&table->lock); - write_lock_irq(&table->rwlock); for (ix = 0; ix < table->sz; ix++) { if (table->data_vec[ix].attr.ndev == ndev) { @@ -484,7 +460,6 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, } } - write_unlock_irq(&table->rwlock); mutex_unlock(&table->lock); if (deleted) @@ -724,7 +699,7 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, if (!table) return; - write_lock_irq(&table->rwlock); + mutex_lock(&table->lock); for (i = 0; i < table->sz; ++i) { if (memcmp(&table->data_vec[i].gid, &zgid, sizeof(table->data_vec[i].gid))) { @@ -732,7 +707,7 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, deleted = true; } } - write_unlock_irq(&table->rwlock); + mutex_unlock(&table->lock); if (deleted) dispatch_gid_change_event(ib_dev, port); @@ -1172,12 +1147,12 @@ static void ib_cache_update(struct ib_device *device, if (!use_roce_gid_table) { gid_attr.device = device; gid_attr.port_num = port; - write_lock(&table->rwlock); + mutex_lock(&table->lock); for (i = 0; i < gid_cache->table_len; i++) { gid_attr.index = i; add_modify_gid(table, gid_cache->table + i, &gid_attr); } - write_unlock(&table->rwlock); + mutex_unlock(&table->lock); } device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc; -- 2.16.2 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html