Re: [RFC 1/5] nvme-rdma: use ib_client API to wrap ib_device

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, May 31, 2018 at 11:09 AM, Sagi Grimberg <sagi@xxxxxxxxxxx> wrote:
>
>
> On 05/15/2018 06:15 PM, Roman Pen wrote:
>>
>> ib_client API provides a way to wrap an ib_device with a specific ULP
>> structure.  Using that API local lists and mutexes can be completely
>> avoided and allocation/removal paths become a bit cleaner.
>>
>> Signed-off-by: Roman Pen <roman.penyaev@xxxxxxxxxxxxxxx>
>> Cc: Christoph Hellwig <hch@xxxxxx>
>> Cc: Steve Wise <swise@xxxxxxxxxxxxxxxxxxxxx>
>> Cc: Bart Van Assche <bart.vanassche@xxxxxxxxxxx>
>> Cc: Sagi Grimberg <sagi@xxxxxxxxxxx>
>> Cc: Doug Ledford <dledford@xxxxxxxxxx>
>> ---
>>   drivers/nvme/host/rdma.c | 82
>> ++++++++++++++++++++++--------------------------
>>   1 file changed, 38 insertions(+), 44 deletions(-)
>>
>> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
>> index 1eb4438a8763..f003c8d77846 100644
>> --- a/drivers/nvme/host/rdma.c
>> +++ b/drivers/nvme/host/rdma.c
>> @@ -46,7 +46,6 @@ struct nvme_rdma_device {
>>         struct ib_device        *dev;
>>         struct ib_pd            *pd;
>>         struct kref             ref;
>> -       struct list_head        entry;
>>   };
>>     struct nvme_rdma_qe {
>> @@ -124,9 +123,7 @@ static inline struct nvme_rdma_ctrl
>> *to_rdma_ctrl(struct nvme_ctrl *ctrl)
>>         return container_of(ctrl, struct nvme_rdma_ctrl, ctrl);
>>   }
>>   -static LIST_HEAD(device_list);
>> -static DEFINE_MUTEX(device_list_mutex);
>> -
>> +static struct ib_client nvme_rdma_ib_client;
>>   static LIST_HEAD(nvme_rdma_ctrl_list);
>>   static DEFINE_MUTEX(nvme_rdma_ctrl_mutex);
>>   @@ -325,17 +322,14 @@ static void nvme_rdma_free_dev(struct kref *ref)
>>         struct nvme_rdma_device *ndev =
>>                 container_of(ref, struct nvme_rdma_device, ref);
>>   -     mutex_lock(&device_list_mutex);
>> -       list_del(&ndev->entry);
>> -       mutex_unlock(&device_list_mutex);
>> -
>> +       ib_set_client_data(ndev->dev, &nvme_rdma_ib_client, NULL);
>>         ib_dealloc_pd(ndev->pd);
>>         kfree(ndev);
>>   }
>>   -static void nvme_rdma_dev_put(struct nvme_rdma_device *dev)
>> +static int nvme_rdma_dev_put(struct nvme_rdma_device *dev)
>>   {
>> -       kref_put(&dev->ref, nvme_rdma_free_dev);
>> +       return kref_put(&dev->ref, nvme_rdma_free_dev);
>>   }
>>     static int nvme_rdma_dev_get(struct nvme_rdma_device *dev)
>> @@ -348,43 +342,36 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
>>   {
>>         struct nvme_rdma_device *ndev;
>>   -     mutex_lock(&device_list_mutex);
>> -       list_for_each_entry(ndev, &device_list, entry) {
>> -               if (ndev->dev->node_guid == cm_id->device->node_guid &&
>> -                   nvme_rdma_dev_get(ndev))
>> -                       goto out_unlock;
>> -       }
>> +       ndev = ib_get_client_data(cm_id->device, &nvme_rdma_ib_client);
>> +       if (ndev && WARN_ON(!nvme_rdma_dev_get(ndev)))
>> +               ndev = NULL;
>> +
>> +       return ndev;
>> +}
>> +
>> +static struct nvme_rdma_device *
>> +nvme_rdma_alloc_device(struct ib_device *device)
>> +{
>> +       struct nvme_rdma_device *ndev;
>>         ndev = kzalloc(sizeof(*ndev), GFP_KERNEL);
>> -       if (!ndev)
>> -               goto out_err;
>> +       if (unlikely(!ndev))
>> +               return NULL;
>>   -     ndev->dev = cm_id->device;
>> +       ndev->dev = device;
>>         kref_init(&ndev->ref);
>>         ndev->pd = ib_alloc_pd(ndev->dev,
>>                 register_always ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY);
>> -       if (IS_ERR(ndev->pd))
>> +       if (unlikely(IS_ERR(ndev->pd)))
>>                 goto out_free_dev;
>>   -     if (!(ndev->dev->attrs.device_cap_flags &
>> -             IB_DEVICE_MEM_MGT_EXTENSIONS)) {
>> -               dev_err(&ndev->dev->dev,
>> -                       "Memory registrations not supported.\n");
>> -               goto out_free_pd;
>> -       }
>
>
> Why was this removed from here? I think it should live in
> nvme_rdma_alloc_device.

Hi Sagi,

The idea was to move such capability check exactly inside event
handler nvme_rdma_add_one(), i.e. event comes, we firstly check
is this device capable or not and if not we do not take any other
branches but return with pr_err().

But device creation is called when we are completely sure that
we can create the device, i.e. if error happens inside
nvme_rdma_alloc_device() that will be something not nice, i.e.
-ENOMEM, but not more generic -ENOTSUPP.

Of course that is only my sense of beauty :)  I do not insist
and for sure can calmly move that check back.

>
>> +       ib_set_client_data(ndev->dev, &nvme_rdma_ib_client, ndev);
>
>
> This however, should be done in the call-site after this completes
> successfully.

Well, then nvme_rdma_alloc_device() and its brother nvme_rdma_free_dev()
(hm, brother's name should also end with *ice, will fix that) won't be
so similar, please take a look:

nvme_rdma_free_dev(...)
{
     ...
     ib_set_client_data(ndev->dev, &nvme_rdma_ib_client, NULL);
     ...
}

nvme_rdma_alloc_device(...)
{
     ...
     ib_set_client_data(ndev->dev, &nvme_rdma_ib_client, ndev);
     ...
}

So if _free_dev() does ib_set_client_data() then for the sake
of symmetry *_alloc_device() also does the same.

>
>> @@ -2017,22 +2004,27 @@ static struct nvmf_transport_ops
>> nvme_rdma_transport = {
>>         .create_ctrl    = nvme_rdma_create_ctrl,
>>   };
>>   -static void nvme_rdma_remove_one(struct ib_device *ib_device, void
>> *client_data)
>> +static void nvme_rdma_add_one(struct ib_device *ib_device)
>>   {
>> -       struct nvme_rdma_ctrl *ctrl;
>>         struct nvme_rdma_device *ndev;
>> -       bool found = false;
>>   -     mutex_lock(&device_list_mutex);
>> -       list_for_each_entry(ndev, &device_list, entry) {
>> -               if (ndev->dev == ib_device) {
>> -                       found = true;
>> -                       break;
>> -               }
>> +       if (!(ib_device->attrs.device_cap_flags &
>> +             IB_DEVICE_MEM_MGT_EXTENSIONS)) {
>> +               pr_err("Memory registrations not supported.\n");
>> +               /* Argh */
>> +               return;
>>         }
>> -       mutex_unlock(&device_list_mutex);
>> +       ndev = nvme_rdma_alloc_device(ib_device);
>> +       /* Argh!! */
>> +       WARN_ON(!ndev);
>
>
> NO need for the WARN_ON, either this is a resource allocation problem
> which would manifest anyways, or this is a device we are not interested
> in using. I suggest we make this a debug log.

Yep.


>> +}
>> +
>> +static void nvme_rdma_remove_one(struct ib_device *ib_device, void
>> *client_data)
>> +{
>> +       struct nvme_rdma_device *ndev = client_data;
>> +       struct nvme_rdma_ctrl *ctrl;
>>   -     if (!found)
>> +       if (unlikely(!ndev))
>>                 return;
>>         /* Delete all controllers using this device */
>> @@ -2045,10 +2037,12 @@ static void nvme_rdma_remove_one(struct ib_device
>> *ib_device, void *client_data)
>>         mutex_unlock(&nvme_rdma_ctrl_mutex);
>>         flush_workqueue(nvme_delete_wq);
>> +       WARN_ON(!nvme_rdma_dev_put(ndev));
>>   }
>>     static struct ib_client nvme_rdma_ib_client = {
>>         .name   = "nvme_rdma",
>> +       .add    = nvme_rdma_add_one,
>>         .remove = nvme_rdma_remove_one
>>   };
>
>
> Overall I think this is a nice approach :)

Then I will prepare another set in couple of days.

--
Roman
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux