On Tue, Dec 5, 2023 at 6:30 PM Zhu Yanjun <yanjun.zhu@xxxxxxxxx> wrote: > > > 在 2023/12/5 13:55, Zhu Yanjun 写道: > > Add David S. Miller and David Ahern. > > > > They are the maintainers in netdev and very familiar with mcast. > > > > Zhu Yanjun > > > > 在 2023/12/5 8:26, Bob Pearson 写道: > >> Currently the rdma_rxe driver does not receive mcast packets at all. > >> > >> Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister > >> the IP mcast address. This is required for mcast traffic to reach the > >> rxe driver when coming from an external source. > >> > >> Fixes: 8700e3e7c485 ("Soft RoCE driver") > >> Signed-off-by: Bob Pearson <rpearsonhpe@xxxxxxxxx> > >> --- > >> drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++----- > >> drivers/infiniband/sw/rxe/rxe_net.c | 2 +- > >> drivers/infiniband/sw/rxe/rxe_net.h | 1 + > >> drivers/infiniband/sw/rxe/rxe_verbs.h | 1 + > >> 4 files changed, 102 insertions(+), 21 deletions(-) > >> > >> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c > >> b/drivers/infiniband/sw/rxe/rxe_mcast.c > >> index 86cc2e18a7fd..54735d07cee5 100644 > >> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c > >> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c > >> @@ -19,38 +19,116 @@ > >> * mcast packets in the rxe receive path. > >> */ > >> +#include <linux/igmp.h> > >> + > >> #include "rxe.h" > >> -/** > >> - * rxe_mcast_add - add multicast address to rxe device > >> - * @rxe: rxe device object > >> - * @mgid: multicast address as a gid > >> - * > >> - * Returns 0 on success else an error > >> - */ > >> -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) > >> +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid) > >> { > >> + struct in6_addr *addr6 = (struct in6_addr *)mgid; > >> + struct sock *sk = recv_sockets.sk6->sk; > >> unsigned char ll_addr[ETH_ALEN]; > >> + int err; > >> + > >> + spin_lock_bh(&sk->sk_lock.slock); > >> + rtnl_lock(); > >> + err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6); > > > Normally sk_lock is used. Not sure if spin_lock_bh is correct or not. ./net/ipv6/addrconf.c-2915- lock_sock(sk); ./net/ipv6/addrconf.c-2916- if (join) ./net/ipv6/addrconf.c:2917: ret = ipv6_sock_mc_join(sk, ifindex, addr); ./net/ipv6/addrconf.c-2918- else ./net/ipv6/addrconf.c-2919- ret = ipv6_sock_mc_drop(sk, ifindex, addr); ./net/ipv6/addrconf.c-2920- release_sock(sk); Should be lock_sock? > > Please Jason or experts from netdev comment on this. > > Thanks, > > Zhu Yanjun > > > >> + rtnl_unlock(); > >> + spin_unlock_bh(&sk->sk_lock.slock); > >> + if (err && err != -EADDRINUSE) > >> + goto err_out; > >> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); > >> + err = dev_mc_add(rxe->ndev, ll_addr); > >> + if (err) > >> + goto err_drop; > >> + > >> + return 0; > >> - return dev_mc_add(rxe->ndev, ll_addr); > >> +err_drop: > >> + spin_lock_bh(&sk->sk_lock.slock); > >> + rtnl_lock(); > >> + ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6); > >> + rtnl_unlock(); > >> + spin_unlock_bh(&sk->sk_lock.slock); > >> +err_out: > >> + return err; > >> } > >> -/** > >> - * rxe_mcast_del - delete multicast address from rxe device > >> - * @rxe: rxe device object > >> - * @mgid: multicast address as a gid > >> - * > >> - * Returns 0 on success else an error > >> - */ > >> -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) > >> +static int rxe_mcast_add(struct rxe_mcg *mcg) > >> { > >> + struct rxe_dev *rxe = mcg->rxe; > >> + union ib_gid *mgid = &mcg->mgid; > >> unsigned char ll_addr[ETH_ALEN]; > >> + struct ip_mreqn imr = {}; > >> + int err; > >> + > >> + if (mcg->is_ipv6) > >> + return rxe_mcast_add6(rxe, mgid); > >> + > >> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); > >> + imr.imr_ifindex = rxe->ndev->ifindex; > >> + rtnl_lock(); > >> + err = ip_mc_join_group(recv_sockets.sk4->sk, &imr); > >> + rtnl_unlock(); > >> + if (err && err != -EADDRINUSE) > >> + goto err_out; > >> + > >> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); > >> + err = dev_mc_add(rxe->ndev, ll_addr); > >> + if (err) > >> + goto err_leave; > >> + > >> + return 0; > >> + > >> +err_leave: > >> + rtnl_lock(); > >> + ip_mc_leave_group(recv_sockets.sk4->sk, &imr); > >> + rtnl_unlock(); > >> +err_out: > >> + return err; > >> +} > >> + > >> +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid) > >> +{ > >> + struct sock *sk = recv_sockets.sk6->sk; > >> + unsigned char ll_addr[ETH_ALEN]; > >> + int err, err2; > >> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); > >> + err = dev_mc_del(rxe->ndev, ll_addr); > >> + > >> + spin_lock_bh(&sk->sk_lock.slock); > >> + rtnl_lock(); > >> + err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, > >> + (struct in6_addr *)mgid); > >> + rtnl_unlock(); > >> + spin_unlock_bh(&sk->sk_lock.slock); > >> + > >> + return err ?: err2; > >> +} > >> + > >> +static int rxe_mcast_del(struct rxe_mcg *mcg) > >> +{ > >> + struct rxe_dev *rxe = mcg->rxe; > >> + union ib_gid *mgid = &mcg->mgid; > >> + unsigned char ll_addr[ETH_ALEN]; > >> + struct ip_mreqn imr = {}; > >> + int err, err2; > >> + > >> + if (mcg->is_ipv6) > >> + return rxe_mcast_del6(rxe, mgid); > >> + > >> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12); > >> + imr.imr_ifindex = rxe->ndev->ifindex; > >> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr); > >> + err = dev_mc_del(rxe->ndev, ll_addr); > >> + > >> + rtnl_lock(); > >> + err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr); > >> + rtnl_unlock(); > >> - return dev_mc_del(rxe->ndev, ll_addr); > >> + return err ?: err2; > >> } > >> /** > >> @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe, > >> union ib_gid *mgid, > >> { > >> kref_init(&mcg->ref_cnt); > >> memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); > >> + mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid); > >> INIT_LIST_HEAD(&mcg->qp_list); > >> mcg->rxe = rxe; > >> @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct > >> rxe_dev *rxe, union ib_gid *mgid) > >> spin_unlock_bh(&rxe->mcg_lock); > >> /* add mcast address outside of lock */ > >> - err = rxe_mcast_add(rxe, mgid); > >> + err = rxe_mcast_add(mcg); > >> if (!err) > >> return mcg; > >> @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) > >> static void rxe_destroy_mcg(struct rxe_mcg *mcg) > >> { > >> /* delete mcast address outside of lock */ > >> - rxe_mcast_del(mcg->rxe, &mcg->mgid); > >> + rxe_mcast_del(mcg); > >> spin_lock_bh(&mcg->rxe->mcg_lock); > >> __rxe_destroy_mcg(mcg); > >> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c > >> b/drivers/infiniband/sw/rxe/rxe_net.c > >> index 58c3f3759bf0..b481f8da2002 100644 > >> --- a/drivers/infiniband/sw/rxe/rxe_net.c > >> +++ b/drivers/infiniband/sw/rxe/rxe_net.c > >> @@ -18,7 +18,7 @@ > >> #include "rxe_net.h" > >> #include "rxe_loc.h" > >> -static struct rxe_recv_sockets recv_sockets; > >> +struct rxe_recv_sockets recv_sockets; > >> static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, > >> struct net_device *ndev, > >> diff --git a/drivers/infiniband/sw/rxe/rxe_net.h > >> b/drivers/infiniband/sw/rxe/rxe_net.h > >> index 45d80d00f86b..89cee7d5340f 100644 > >> --- a/drivers/infiniband/sw/rxe/rxe_net.h > >> +++ b/drivers/infiniband/sw/rxe/rxe_net.h > >> @@ -15,6 +15,7 @@ struct rxe_recv_sockets { > >> struct socket *sk4; > >> struct socket *sk6; > >> }; > >> +extern struct rxe_recv_sockets recv_sockets; > >> int rxe_net_add(const char *ibdev_name, struct net_device *ndev); > >> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h > >> b/drivers/infiniband/sw/rxe/rxe_verbs.h > >> index ccb9d19ffe8a..7be9e6232dd9 100644 > >> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h > >> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h > >> @@ -352,6 +352,7 @@ struct rxe_mcg { > >> atomic_t qp_num; > >> u32 qkey; > >> u16 pkey; > >> + bool is_ipv6; > >> }; > >> struct rxe_mca { >