Re: [PATCH rdma-core] Add Red Hat's ibdev2netdev helper

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 12/11/2018 21:27, Leon Romanovsky wrote:
> On Tue, Dec 11, 2018 at 04:53:49PM -0500, Jarod Wilson wrote:
>> On 2018-12-11 4:17 PM, Parav Pandit wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: linux-rdma-owner@xxxxxxxxxxxxxxx <linux-rdma-
>>>> owner@xxxxxxxxxxxxxxx> On Behalf Of Mark Bloch
>>>> Sent: Tuesday, December 11, 2018 1:57 PM
>>>> To: Jarod Wilson <jarod@xxxxxxxxxx>; linux-rdma@xxxxxxxxxxxxxxx
>>>> Cc: Doug Ledford <dledford@xxxxxxxxxx>; Honggang Li <honli@xxxxxxxxxx>
>>>> Subject: Re: [PATCH rdma-core] Add Red Hat's ibdev2netdev helper
>>>>
>>>>
>>>>
>>>> On 12/11/18 11:46 AM, Jarod Wilson wrote:
>>>>> This is a helper script that Red Hat had previously been including in
>>>>> it's ibutils package, which is no more, with the retirement of ibutils
>>>>> and
>>>>> ibutils2 being proprietary to Mellanox now. This script still has use,
>>>>> and needs a better home, so we'd like to add it to rdma-core's redhat/
>>>>> directory -- if not somewhere more generic, should other distros wish
>>>>> to make use of it as well.
>>>>
>>>> Why not tell users to use rdma tool (iproute2)?
>>>>
>>> Because rdma tool currently doesn’t show corresponding IPoIB upper netdevice(s) of for the IB device.
>>> For a given rdma device there can be multiple netdevices exist for RoCE ports.
>>> Rdma tool (and ibdev2netdev) doesn't show them.
>>> The scope of ibdev2netdev currently is wider than what rdma tool shows.
>>> So yes, its good recommendation to RoCE and representor users.
>>
>> We've actually had some folks notice it was missing from RHEL-8.0 beta, and
>> explicitly request it, because they have various infrastructure that was
>> expecting it to be there. It's just a shell script, and could be updated
>> along the way to use the rdma tool from iproute2, or become completely
>> obsolete at some point. But we're definitely some demand to keep it around
>> for the moment.
> 
> It is already obsolete and rdmatool replaces it. I see no value in
> keeping abandoned tool in upstream repository.
> 
> Regarding Parav's point, lack of IPoIB translations, iroute2 is an open-source
> project and people are encouraged to extend it. If I judge by number of
> customer requests to me (zero) to implement it, there is no demand in such
> functionality.

Well, a quick prototype follows below, the question is should I work on it
or not, does anyone core?

output can look something like this:

$ ./rdma/rdma link -d show mlx5_3
4/1: mlx5_3/1: subnet_prefix fe80:0000:0000:0000 lid 65535 sm_lid 0 lmc 0 state DOWN physical_state DISABLED
    0: netdev ib0 netdev_index 35
    1: netdev ib0.3333 netdev_index 36
    2: netdev ib0.3332 netdev_index 37
    caps: <TRAP, SL_MAP, SYS_IMAGE_GUID, CABLE_INFO, EXTENDED_SPEEDS, CAP_MASK2, CM, VENDOR_CLASS, CAP_MASK_NOTICE, CLIENT_REG, OTHER_LOCAL_CHANGES, MULT_PKER_TRAP>

Code (kernel side only, quick hack):

diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index cc7535c5e192..cfdbe6ede841 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -116,6 +116,10 @@ int ib_cache_setup_one(struct ib_device *device);
 void ib_cache_cleanup_one(struct ib_device *device);
 void ib_cache_release_one(struct ib_device *device);

+int ib_dump_net_devs(struct ib_device *dev, u32 port,
+                    int (*func)(struct net_device *, void *),
+                    void *ctx);
+
 #ifdef CONFIG_CGROUP_RDMA
 int ib_device_register_rdmacg(struct ib_device *device);
 void ib_device_unregister_rdmacg(struct ib_device *device);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 348a7fb1f945..ee5eacb49821 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1174,6 +1174,38 @@ int ib_find_pkey(struct ib_device *device,
 }
 EXPORT_SYMBOL(ib_find_pkey);

+int ib_dump_net_devs(struct ib_device *dev, u32 port,
+                    int (*func)(struct net_device *, void *),
+                    void *ctx)
+{
+       struct ib_client_data *context;
+       int ret = 0;
+
+       if (!rdma_protocol_ib(dev, port))
+               return 0;
+
+       down_read(&lists_rwsem);
+
+       list_for_each_entry(context, &dev->client_data_list, list) {
+               struct ib_client *client = context->client;
+
+               if (context->going_down)
+                       continue;
+
+               if (client->dump_net_devs) {
+                       ret = client->dump_net_devs(dev, port, context->data,
+                                                   func, ctx);
+                       if (ret)
+                               goto out;
+               }
+       }
+
+out:
+       up_read(&lists_rwsem);
+
+       return ret;
+}
+
 /**
  * ib_get_net_dev_by_params() - Return the appropriate net_dev
  * for a received CM request
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 9abbadb9e366..367205544930 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -107,6 +107,8 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
        [RDMA_NLDEV_ATTR_DRIVER_U32]            = { .type = NLA_U32 },
        [RDMA_NLDEV_ATTR_DRIVER_S64]            = { .type = NLA_S64 },
        [RDMA_NLDEV_ATTR_DRIVER_U64]            = { .type = NLA_U64 },
+       [RDMA_NLDEV_ATTR_NDEV_EXT]              = { .type = NLA_NESTED},
+       [RDMA_NLDEV_ATTR_NDEV_EXT_ENTRY]        = { .type = NLA_NESTED},
 };

 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -220,11 +222,43 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
        return 0;
 }

+struct dump_netdevs {
+       const struct net *net;
+       struct sk_buff *msg;
+};
+
+static int fill_port_net_dev_ext(struct net_device *netdev,
+                                void *ptr)
+{
+       struct dump_netdevs *ctx = ptr;
+       struct nlattr *entry_attr;
+       int ret;
+
+       if (netdev && net_eq(dev_net(netdev), ctx->net)) {
+               entry_attr = nla_nest_start(ctx->msg,
+                                           RDMA_NLDEV_ATTR_NDEV_EXT_ENTRY);
+               ret = nla_put_string(ctx->msg, RDMA_NLDEV_ATTR_NDEV_NAME,
+                                    netdev->name);
+               if (ret)
+                       return -ENOMEM;
+
+               ret = nla_put_u32(ctx->msg,
+                                 RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
+               if (ret)
+                       return -ENOMEM;
+               nla_nest_end(ctx->msg, entry_attr);
+       }
+
+       return 0;
+}
+
 static int fill_port_info(struct sk_buff *msg,
                          struct ib_device *device, u32 port,
                          const struct net *net)
 {
+       struct dump_netdevs ctx = {.net = net, .msg = msg};
        struct net_device *netdev = NULL;
+       struct nlattr *entry_attr;
        struct ib_port_attr attr;
        int ret;

@@ -271,6 +305,12 @@ static int fill_port_info(struct sk_buff *msg,
                                     RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
        }

+       entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_NDEV_EXT);
+       if (!entry_attr)
+               return -EMSGSIZE;
+
+       ret = ib_dump_net_devs(device, port, fill_port_net_dev_ext, &ctx);
+       nla_nest_end(msg, entry_attr);
 out:
        if (netdev)
                dev_put(netdev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 8710214594d8..cf39ec8e7e75 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -97,6 +97,10 @@ static struct net_device *ipoib_get_net_dev_by_params(
                struct ib_device *dev, u8 port, u16 pkey,
                const union ib_gid *gid, const struct sockaddr *addr,
                void *client_data);
+static int ipoib_dump_net_devs(struct ib_device *dev, u8 port,
+                              void *client_data,
+                              int (*func)(struct net_device *, void *),
+                              void *ctx);
 static int ipoib_set_mac(struct net_device *dev, void *addr);
 static int ipoib_ioctl(struct net_device *dev, struct ifreq *ifr,
                       int cmd);
@@ -106,6 +110,7 @@ static struct ib_client ipoib_client = {
        .add    = ipoib_add_one,
        .remove = ipoib_remove_one,
        .get_net_dev_by_params = ipoib_get_net_dev_by_params,
+       .dump_net_devs = ipoib_dump_net_devs,
 };

 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
@@ -465,6 +470,63 @@ static int __ipoib_get_net_dev_by_params(struct list_head *dev_list, u8 port,
        return matches;
 }

+static int __ipoib_dump_net_devs(struct ipoib_dev_priv *priv,
+                                int nesting,
+                                int (*func)(struct net_device *, void *),
+                                void *ctx)
+{
+       struct ipoib_dev_priv *child_priv;
+       struct net_device *net_dev = NULL;
+       int ret = 0;
+
+       net_dev = ipoib_get_master_net_dev(priv->dev);
+
+       if (net_dev) {
+               ret = func(net_dev, ctx);
+               dev_put(net_dev);
+       }
+
+       if (ret)
+               return ret;
+
+       down_read_nested(&priv->vlan_rwsem, nesting);
+       list_for_each_entry(child_priv, &priv->child_intfs, list) {
+               ret = __ipoib_dump_net_devs(child_priv, nesting + 1,
+                                           func, ctx);
+               if (ret)
+                       return ret;
+       }
+       up_read(&priv->vlan_rwsem);
+
+       return ret;
+}
+
+static int ipoib_dump_net_devs(struct ib_device *dev, u8 port,
+                              void *client_data,
+                              int (*func)(struct net_device *, void *),
+                              void *ctx)
+{
+       struct list_head *dev_list = client_data;
+       struct ipoib_dev_priv *priv;
+       int ret = 0;
+
+       if (!rdma_protocol_ib(dev, port))
+               return -EINVAL;
+
+       if (!dev_list)
+               return -EINVAL;
+
+       list_for_each_entry(priv, dev_list, list) {
+               if (priv->port != port)
+                       continue;
+               ret = __ipoib_dump_net_devs(priv, 0, func, ctx);
+               if (ret)
+                       return ret;
+       }
+
+       return ret;
+}
+
 static struct net_device *ipoib_get_net_dev_by_params(
                struct ib_device *dev, u8 port, u16 pkey,
                const union ib_gid *gid, const struct sockaddr *addr,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 85021451eee0..4481d7d627ee 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2618,6 +2618,12 @@ struct ib_client {
        char  *name;
        void (*add)   (struct ib_device *);
        void (*remove)(struct ib_device *, void *client_data);
+       int (*dump_net_devs)(struct ib_device *dev,
+                            u8 port,
+                            void *client_data,
+                            int (*func)(struct net_device *,
+                                        void *),
+                            void *ctx);

        /* Returns the net_dev belonging to this ib_client and matching the
         * given parameters.
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index f9c41bf59efc..52c92c0baa60 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -427,6 +427,13 @@ enum rdma_nldev_attr {
        RDMA_NLDEV_ATTR_DRIVER_S64,             /* s64 */
        RDMA_NLDEV_ATTR_DRIVER_U64,             /* u64 */

+       /*
+        * Provide extanded netdevices that depend on the rdma device
+        * for example IPoIB
+        */
+       RDMA_NLDEV_ATTR_NDEV_EXT,               /* nested_table */
+       RDMA_NLDEV_ATTR_NDEV_EXT_ENTRY,         /* nested_table */
+
        /*
         * Always the end
         */


> 
> Thanks
> 
>>
>> --
>> Jarod Wilson
>> jarod@xxxxxxxxxx




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux