On 2020-09-08 11:22, Jason Gunthorpe wrote: > It is reasonable to consider the cq_pool as a built-in client, so I > would suggest moving it to right around the time the dynamic clients > are handled. Something like this: > > diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c > index c36b4d2b61e0c0..e3651dacad1da6 100644 > --- a/drivers/infiniband/core/device.c > +++ b/drivers/infiniband/core/device.c > @@ -1285,6 +1285,8 @@ static void disable_device(struct ib_device *device) > remove_client_context(device, cid); > } > > + ib_cq_pool_destroy(ib_dev); > + > /* Pairs with refcount_set in enable_device */ > ib_device_put(device); > wait_for_completion(&device->unreg_completion); > @@ -1328,6 +1330,8 @@ static int enable_device_and_get(struct ib_device *device) > goto out; > } > > + ib_cq_pool_init(device); > + > down_read(&clients_rwsem); > xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { > ret = add_client_context(device, client); > @@ -1400,7 +1404,6 @@ int ib_register_device(struct ib_device *device, const char *name) > goto dev_cleanup; > } > > - ib_cq_pool_init(device); > ret = enable_device_and_get(device); > dev_set_uevent_suppress(&device->dev, false); > /* Mark for userspace that device is ready */ > @@ -1455,7 +1458,6 @@ static void __ib_unregister_device(struct ib_device *ib_dev) > goto out; > > disable_device(ib_dev); > - ib_cq_pool_destroy(ib_dev); > > /* Expedite removing unregistered pointers from the hash table */ > free_netdevs(ib_dev); The above patch didn't compile, but the patch below does and makes the hang disappear. So feel free to add the following to the patch below: Tested-by: Bart Van Assche <bvanassche@xxxxxxx> diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index c36b4d2b61e0..23ee65a9185f 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1285,6 +1285,8 @@ static void disable_device(struct ib_device *device) remove_client_context(device, cid); } + ib_cq_pool_destroy(device); + /* Pairs with refcount_set in enable_device */ ib_device_put(device); wait_for_completion(&device->unreg_completion); @@ -1328,6 +1330,8 @@ static int enable_device_and_get(struct ib_device *device) goto out; } + ib_cq_pool_init(device); + down_read(&clients_rwsem); xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { ret = add_client_context(device, client); @@ -1400,7 +1404,6 @@ int ib_register_device(struct ib_device *device, const char *name) goto dev_cleanup; } - ib_cq_pool_init(device); ret = enable_device_and_get(device); dev_set_uevent_suppress(&device->dev, false); /* Mark for userspace that device is ready */ @@ -1455,7 +1458,6 @@ static void __ib_unregister_device(struct ib_device *ib_dev) goto out; disable_device(ib_dev); - ib_cq_pool_destroy(ib_dev); /* Expedite removing unregistered pointers from the hash table */ free_netdevs(ib_dev);