On Thu, May 31, 2018 at 11:09 AM, Sagi Grimberg <sagi@xxxxxxxxxxx> wrote: > > > On 05/15/2018 06:15 PM, Roman Pen wrote: >> >> ib_client API provides a way to wrap an ib_device with a specific ULP >> structure. Using that API local lists and mutexes can be completely >> avoided and allocation/removal paths become a bit cleaner. >> >> Signed-off-by: Roman Pen <roman.penyaev@xxxxxxxxxxxxxxx> >> Cc: Christoph Hellwig <hch@xxxxxx> >> Cc: Steve Wise <swise@xxxxxxxxxxxxxxxxxxxxx> >> Cc: Bart Van Assche <bart.vanassche@xxxxxxxxxxx> >> Cc: Sagi Grimberg <sagi@xxxxxxxxxxx> >> Cc: Doug Ledford <dledford@xxxxxxxxxx> >> --- >> drivers/nvme/host/rdma.c | 82 >> ++++++++++++++++++++++-------------------------- >> 1 file changed, 38 insertions(+), 44 deletions(-) >> >> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c >> index 1eb4438a8763..f003c8d77846 100644 >> --- a/drivers/nvme/host/rdma.c >> +++ b/drivers/nvme/host/rdma.c >> @@ -46,7 +46,6 @@ struct nvme_rdma_device { >> struct ib_device *dev; >> struct ib_pd *pd; >> struct kref ref; >> - struct list_head entry; >> }; >> struct nvme_rdma_qe { >> @@ -124,9 +123,7 @@ static inline struct nvme_rdma_ctrl >> *to_rdma_ctrl(struct nvme_ctrl *ctrl) >> return container_of(ctrl, struct nvme_rdma_ctrl, ctrl); >> } >> -static LIST_HEAD(device_list); >> -static DEFINE_MUTEX(device_list_mutex); >> - >> +static struct ib_client nvme_rdma_ib_client; >> static LIST_HEAD(nvme_rdma_ctrl_list); >> static DEFINE_MUTEX(nvme_rdma_ctrl_mutex); >> @@ -325,17 +322,14 @@ static void nvme_rdma_free_dev(struct kref *ref) >> struct nvme_rdma_device *ndev = >> container_of(ref, struct nvme_rdma_device, ref); >> - mutex_lock(&device_list_mutex); >> - list_del(&ndev->entry); >> - mutex_unlock(&device_list_mutex); >> - >> + ib_set_client_data(ndev->dev, &nvme_rdma_ib_client, NULL); >> ib_dealloc_pd(ndev->pd); >> kfree(ndev); >> } >> -static void nvme_rdma_dev_put(struct nvme_rdma_device *dev) >> +static int nvme_rdma_dev_put(struct nvme_rdma_device *dev) >> { >> - kref_put(&dev->ref, nvme_rdma_free_dev); >> + return kref_put(&dev->ref, nvme_rdma_free_dev); >> } >> static int nvme_rdma_dev_get(struct nvme_rdma_device *dev) >> @@ -348,43 +342,36 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id) >> { >> struct nvme_rdma_device *ndev; >> - mutex_lock(&device_list_mutex); >> - list_for_each_entry(ndev, &device_list, entry) { >> - if (ndev->dev->node_guid == cm_id->device->node_guid && >> - nvme_rdma_dev_get(ndev)) >> - goto out_unlock; >> - } >> + ndev = ib_get_client_data(cm_id->device, &nvme_rdma_ib_client); >> + if (ndev && WARN_ON(!nvme_rdma_dev_get(ndev))) >> + ndev = NULL; >> + >> + return ndev; >> +} >> + >> +static struct nvme_rdma_device * >> +nvme_rdma_alloc_device(struct ib_device *device) >> +{ >> + struct nvme_rdma_device *ndev; >> ndev = kzalloc(sizeof(*ndev), GFP_KERNEL); >> - if (!ndev) >> - goto out_err; >> + if (unlikely(!ndev)) >> + return NULL; >> - ndev->dev = cm_id->device; >> + ndev->dev = device; >> kref_init(&ndev->ref); >> ndev->pd = ib_alloc_pd(ndev->dev, >> register_always ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY); >> - if (IS_ERR(ndev->pd)) >> + if (unlikely(IS_ERR(ndev->pd))) >> goto out_free_dev; >> - if (!(ndev->dev->attrs.device_cap_flags & >> - IB_DEVICE_MEM_MGT_EXTENSIONS)) { >> - dev_err(&ndev->dev->dev, >> - "Memory registrations not supported.\n"); >> - goto out_free_pd; >> - } > > > Why was this removed from here? I think it should live in > nvme_rdma_alloc_device. Hi Sagi, The idea was to move such capability check exactly inside event handler nvme_rdma_add_one(), i.e. event comes, we firstly check is this device capable or not and if not we do not take any other branches but return with pr_err(). But device creation is called when we are completely sure that we can create the device, i.e. if error happens inside nvme_rdma_alloc_device() that will be something not nice, i.e. -ENOMEM, but not more generic -ENOTSUPP. Of course that is only my sense of beauty :) I do not insist and for sure can calmly move that check back. > >> + ib_set_client_data(ndev->dev, &nvme_rdma_ib_client, ndev); > > > This however, should be done in the call-site after this completes > successfully. Well, then nvme_rdma_alloc_device() and its brother nvme_rdma_free_dev() (hm, brother's name should also end with *ice, will fix that) won't be so similar, please take a look: nvme_rdma_free_dev(...) { ... ib_set_client_data(ndev->dev, &nvme_rdma_ib_client, NULL); ... } nvme_rdma_alloc_device(...) { ... ib_set_client_data(ndev->dev, &nvme_rdma_ib_client, ndev); ... } So if _free_dev() does ib_set_client_data() then for the sake of symmetry *_alloc_device() also does the same. > >> @@ -2017,22 +2004,27 @@ static struct nvmf_transport_ops >> nvme_rdma_transport = { >> .create_ctrl = nvme_rdma_create_ctrl, >> }; >> -static void nvme_rdma_remove_one(struct ib_device *ib_device, void >> *client_data) >> +static void nvme_rdma_add_one(struct ib_device *ib_device) >> { >> - struct nvme_rdma_ctrl *ctrl; >> struct nvme_rdma_device *ndev; >> - bool found = false; >> - mutex_lock(&device_list_mutex); >> - list_for_each_entry(ndev, &device_list, entry) { >> - if (ndev->dev == ib_device) { >> - found = true; >> - break; >> - } >> + if (!(ib_device->attrs.device_cap_flags & >> + IB_DEVICE_MEM_MGT_EXTENSIONS)) { >> + pr_err("Memory registrations not supported.\n"); >> + /* Argh */ >> + return; >> } >> - mutex_unlock(&device_list_mutex); >> + ndev = nvme_rdma_alloc_device(ib_device); >> + /* Argh!! */ >> + WARN_ON(!ndev); > > > NO need for the WARN_ON, either this is a resource allocation problem > which would manifest anyways, or this is a device we are not interested > in using. I suggest we make this a debug log. Yep. >> +} >> + >> +static void nvme_rdma_remove_one(struct ib_device *ib_device, void >> *client_data) >> +{ >> + struct nvme_rdma_device *ndev = client_data; >> + struct nvme_rdma_ctrl *ctrl; >> - if (!found) >> + if (unlikely(!ndev)) >> return; >> /* Delete all controllers using this device */ >> @@ -2045,10 +2037,12 @@ static void nvme_rdma_remove_one(struct ib_device >> *ib_device, void *client_data) >> mutex_unlock(&nvme_rdma_ctrl_mutex); >> flush_workqueue(nvme_delete_wq); >> + WARN_ON(!nvme_rdma_dev_put(ndev)); >> } >> static struct ib_client nvme_rdma_ib_client = { >> .name = "nvme_rdma", >> + .add = nvme_rdma_add_one, >> .remove = nvme_rdma_remove_one >> }; > > > Overall I think this is a nice approach :) Then I will prepare another set in couple of days. -- Roman -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html