Re: [PATCH for-next v5 3/7] RDMA/rxe: Register IP mcast address

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 12/6/23 19:47, Rain River wrote:
On Tue, Dec 5, 2023 at 6:30 PM Zhu Yanjun <yanjun.zhu@xxxxxxxxx> wrote:


在 2023/12/5 13:55, Zhu Yanjun 写道:
Add David S. Miller and  David Ahern.

They are the maintainers in netdev and very familiar with mcast.

Zhu Yanjun

在 2023/12/5 8:26, Bob Pearson 写道:
Currently the rdma_rxe driver does not receive mcast packets at all.

Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister
the IP mcast address. This is required for mcast traffic to reach the
rxe driver when coming from an external source.

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Signed-off-by: Bob Pearson <rpearsonhpe@xxxxxxxxx>
---
   drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++-----
   drivers/infiniband/sw/rxe/rxe_net.c   |   2 +-
   drivers/infiniband/sw/rxe/rxe_net.h   |   1 +
   drivers/infiniband/sw/rxe/rxe_verbs.h |   1 +
   4 files changed, 102 insertions(+), 21 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c
b/drivers/infiniband/sw/rxe/rxe_mcast.c
index 86cc2e18a7fd..54735d07cee5 100644
--- a/drivers/infiniband/sw/rxe/rxe_mcast.c
+++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
@@ -19,38 +19,116 @@
    * mcast packets in the rxe receive path.
    */
   +#include <linux/igmp.h>
+
   #include "rxe.h"
   -/**
- * rxe_mcast_add - add multicast address to rxe device
- * @rxe: rxe device object
- * @mgid: multicast address as a gid
- *
- * Returns 0 on success else an error
- */
-static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
+static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid)
   {
+    struct in6_addr *addr6 = (struct in6_addr *)mgid;
+    struct sock *sk = recv_sockets.sk6->sk;
       unsigned char ll_addr[ETH_ALEN];
+    int err;
+
+    spin_lock_bh(&sk->sk_lock.slock);
+    rtnl_lock();
+    err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6);


Normally sk_lock is used. Not sure if spin_lock_bh is correct or not.

./net/ipv6/addrconf.c-2915-     lock_sock(sk);
./net/ipv6/addrconf.c-2916-     if (join)
./net/ipv6/addrconf.c:2917:             ret = ipv6_sock_mc_join(sk,
ifindex, addr);
./net/ipv6/addrconf.c-2918-     else
./net/ipv6/addrconf.c-2919-             ret = ipv6_sock_mc_drop(sk,
ifindex, addr);
./net/ipv6/addrconf.c-2920-     release_sock(sk);

Should be lock_sock?

It works as well as spin_lock_bh() in preventing the RCU splat and
looks like the preferred way. I'll make this change.

Bob


Please Jason or experts from netdev comment on this.

Thanks,

Zhu Yanjun


+    rtnl_unlock();
+    spin_unlock_bh(&sk->sk_lock.slock);
+    if (err && err != -EADDRINUSE)
+        goto err_out;
         ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
+    err = dev_mc_add(rxe->ndev, ll_addr);
+    if (err)
+        goto err_drop;
+
+    return 0;
   -    return dev_mc_add(rxe->ndev, ll_addr);
+err_drop:
+    spin_lock_bh(&sk->sk_lock.slock);
+    rtnl_lock();
+    ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6);
+    rtnl_unlock();
+    spin_unlock_bh(&sk->sk_lock.slock);
+err_out:
+    return err;
   }
   -/**
- * rxe_mcast_del - delete multicast address from rxe device
- * @rxe: rxe device object
- * @mgid: multicast address as a gid
- *
- * Returns 0 on success else an error
- */
-static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid)
+static int rxe_mcast_add(struct rxe_mcg *mcg)
   {
+    struct rxe_dev *rxe = mcg->rxe;
+    union ib_gid *mgid = &mcg->mgid;
       unsigned char ll_addr[ETH_ALEN];
+    struct ip_mreqn imr = {};
+    int err;
+
+    if (mcg->is_ipv6)
+        return rxe_mcast_add6(rxe, mgid);
+
+    imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
+    imr.imr_ifindex = rxe->ndev->ifindex;
+    rtnl_lock();
+    err = ip_mc_join_group(recv_sockets.sk4->sk, &imr);
+    rtnl_unlock();
+    if (err && err != -EADDRINUSE)
+        goto err_out;
+
+    ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
+    err = dev_mc_add(rxe->ndev, ll_addr);
+    if (err)
+        goto err_leave;
+
+    return 0;
+
+err_leave:
+    rtnl_lock();
+    ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
+    rtnl_unlock();
+err_out:
+    return err;
+}
+
+static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid)
+{
+    struct sock *sk = recv_sockets.sk6->sk;
+    unsigned char ll_addr[ETH_ALEN];
+    int err, err2;
         ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
+    err = dev_mc_del(rxe->ndev, ll_addr);
+
+    spin_lock_bh(&sk->sk_lock.slock);
+    rtnl_lock();
+    err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex,
+            (struct in6_addr *)mgid);
+    rtnl_unlock();
+    spin_unlock_bh(&sk->sk_lock.slock);
+
+    return err ?: err2;
+}
+
+static int rxe_mcast_del(struct rxe_mcg *mcg)
+{
+    struct rxe_dev *rxe = mcg->rxe;
+    union ib_gid *mgid = &mcg->mgid;
+    unsigned char ll_addr[ETH_ALEN];
+    struct ip_mreqn imr = {};
+    int err, err2;
+
+    if (mcg->is_ipv6)
+        return rxe_mcast_del6(rxe, mgid);
+
+    imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
+    imr.imr_ifindex = rxe->ndev->ifindex;
+    ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
+    err = dev_mc_del(rxe->ndev, ll_addr);
+
+    rtnl_lock();
+    err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
+    rtnl_unlock();
   -    return dev_mc_del(rxe->ndev, ll_addr);
+    return err ?: err2;
   }
     /**
@@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe,
union ib_gid *mgid,
   {
       kref_init(&mcg->ref_cnt);
       memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid));
+    mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid);
       INIT_LIST_HEAD(&mcg->qp_list);
       mcg->rxe = rxe;
   @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct
rxe_dev *rxe, union ib_gid *mgid)
       spin_unlock_bh(&rxe->mcg_lock);
         /* add mcast address outside of lock */
-    err = rxe_mcast_add(rxe, mgid);
+    err = rxe_mcast_add(mcg);
       if (!err)
           return mcg;
   @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
   static void rxe_destroy_mcg(struct rxe_mcg *mcg)
   {
       /* delete mcast address outside of lock */
-    rxe_mcast_del(mcg->rxe, &mcg->mgid);
+    rxe_mcast_del(mcg);
         spin_lock_bh(&mcg->rxe->mcg_lock);
       __rxe_destroy_mcg(mcg);
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c
b/drivers/infiniband/sw/rxe/rxe_net.c
index 58c3f3759bf0..b481f8da2002 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -18,7 +18,7 @@
   #include "rxe_net.h"
   #include "rxe_loc.h"
   -static struct rxe_recv_sockets recv_sockets;
+struct rxe_recv_sockets recv_sockets;
     static struct dst_entry *rxe_find_route4(struct rxe_qp *qp,
                        struct net_device *ndev,
diff --git a/drivers/infiniband/sw/rxe/rxe_net.h
b/drivers/infiniband/sw/rxe/rxe_net.h
index 45d80d00f86b..89cee7d5340f 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.h
+++ b/drivers/infiniband/sw/rxe/rxe_net.h
@@ -15,6 +15,7 @@ struct rxe_recv_sockets {
       struct socket *sk4;
       struct socket *sk6;
   };
+extern struct rxe_recv_sockets recv_sockets;
     int rxe_net_add(const char *ibdev_name, struct net_device *ndev);
   diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h
b/drivers/infiniband/sw/rxe/rxe_verbs.h
index ccb9d19ffe8a..7be9e6232dd9 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -352,6 +352,7 @@ struct rxe_mcg {
       atomic_t        qp_num;
       u32            qkey;
       u16            pkey;
+    bool            is_ipv6;
   };
     struct rxe_mca {





[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux