Patch "RDMA/device: Fix a race between mad_client and cm_client init" has been added to the 6.6-stable tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is a note to let you know that I've just added the patch titled

    RDMA/device: Fix a race between mad_client and cm_client init

to the 6.6-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     rdma-device-fix-a-race-between-mad_client-and-cm_cli.patch
and it can be found in the queue-6.6 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@xxxxxxxxxxxxxxx> know about it.



commit 38fc96ae2c4e781d6c53cc08fda899dfecaa9785
Author: Shifeng Li <lishifeng@xxxxxxxxxxxxxx>
Date:   Fri Feb 2 19:53:13 2024 -0800

    RDMA/device: Fix a race between mad_client and cm_client init
    
    [ Upstream commit 7a8bccd8b29c321ac181369b42b04fecf05f98e2 ]
    
    The mad_client will be initialized in enable_device_and_get(), while the
    devices_rwsem will be downgraded to a read semaphore. There is a window
    that leads to the failed initialization for cm_client, since it can not
    get matched mad port from ib_mad_port_list, and the matched mad port will
    be added to the list after that.
    
        mad_client    |                       cm_client
    ------------------|--------------------------------------------------------
    ib_register_device|
    enable_device_and_get
    down_write(&devices_rwsem)
    xa_set_mark(&devices, DEVICE_REGISTERED)
    downgrade_write(&devices_rwsem)
                      |
                      |ib_cm_init
                      |ib_register_client(&cm_client)
                      |down_read(&devices_rwsem)
                      |xa_for_each_marked (&devices, DEVICE_REGISTERED)
                      |add_client_context
                      |cm_add_one
                      |ib_register_mad_agent
                      |ib_get_mad_port
                      |__ib_get_mad_port
                      |list_for_each_entry(entry, &ib_mad_port_list, port_list)
                      |return NULL
                      |up_read(&devices_rwsem)
                      |
    add_client_context|
    ib_mad_init_device|
    ib_mad_port_open  |
    list_add_tail(&port_priv->port_list, &ib_mad_port_list)
    up_read(&devices_rwsem)
                      |
    
    Fix it by using down_write(&devices_rwsem) in ib_register_client().
    
    Fixes: d0899892edd0 ("RDMA/device: Provide APIs from the core code to help unregistration")
    Link: https://lore.kernel.org/r/20240203035313.98991-1-lishifeng@xxxxxxxxxxxxxx
    Suggested-by: Jason Gunthorpe <jgg@xxxxxxxx>
    Signed-off-by: Shifeng Li <lishifeng@xxxxxxxxxxxxxx>
    Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
    Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 010718738d04c..db0a58c82838d 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1730,7 +1730,7 @@ static int assign_client_id(struct ib_client *client)
 {
 	int ret;
 
-	down_write(&clients_rwsem);
+	lockdep_assert_held(&clients_rwsem);
 	/*
 	 * The add/remove callbacks must be called in FIFO/LIFO order. To
 	 * achieve this we assign client_ids so they are sorted in
@@ -1739,14 +1739,11 @@ static int assign_client_id(struct ib_client *client)
 	client->client_id = highest_client_id;
 	ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL);
 	if (ret)
-		goto out;
+		return ret;
 
 	highest_client_id++;
 	xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
-
-out:
-	up_write(&clients_rwsem);
-	return ret;
+	return 0;
 }
 
 static void remove_client_id(struct ib_client *client)
@@ -1776,25 +1773,35 @@ int ib_register_client(struct ib_client *client)
 {
 	struct ib_device *device;
 	unsigned long index;
+	bool need_unreg = false;
 	int ret;
 
 	refcount_set(&client->uses, 1);
 	init_completion(&client->uses_zero);
+
+	/*
+	 * The devices_rwsem is held in write mode to ensure that a racing
+	 * ib_register_device() sees a consisent view of clients and devices.
+	 */
+	down_write(&devices_rwsem);
+	down_write(&clients_rwsem);
 	ret = assign_client_id(client);
 	if (ret)
-		return ret;
+		goto out;
 
-	down_read(&devices_rwsem);
+	need_unreg = true;
 	xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) {
 		ret = add_client_context(device, client);
-		if (ret) {
-			up_read(&devices_rwsem);
-			ib_unregister_client(client);
-			return ret;
-		}
+		if (ret)
+			goto out;
 	}
-	up_read(&devices_rwsem);
-	return 0;
+	ret = 0;
+out:
+	up_write(&clients_rwsem);
+	up_write(&devices_rwsem);
+	if (need_unreg && ret)
+		ib_unregister_client(client);
+	return ret;
 }
 EXPORT_SYMBOL(ib_register_client);
 




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux