On 4/28/23 04:39, Zhu Yanjun wrote: > From: Zhu Yanjun <yanjun.zhu@xxxxxxxxx> > > When running "rdma link del" command, dellink function will be called. > If the sock refcnt is greater than the refcnt needed for udp tunnel, > the sock refcnt will be decreased by 1. > > If equal, the last rdma link is removed. The udp tunnel will be > destroyed. > > Tested-by: Rain River <rain.1986.08.12@xxxxxxxxx> > Signed-off-by: Zhu Yanjun <yanjun.zhu@xxxxxxxxx> > --- > drivers/infiniband/sw/rxe/rxe.c | 12 +++++++++++- > drivers/infiniband/sw/rxe/rxe_net.c | 17 +++++++++++++++-- > drivers/infiniband/sw/rxe/rxe_net.h | 1 + > 3 files changed, 27 insertions(+), 3 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c > index 0ce6adb43cfc..ebfabc6d6b76 100644 > --- a/drivers/infiniband/sw/rxe/rxe.c > +++ b/drivers/infiniband/sw/rxe/rxe.c > @@ -166,10 +166,12 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) > /* called by ifc layer to create new rxe device. > * The caller should allocate memory for rxe by calling ib_alloc_device. > */ > +static struct rdma_link_ops rxe_link_ops; > int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name) > { > rxe_init(rxe); > rxe_set_mtu(rxe, mtu); > + rxe->ib_dev.link_ops = &rxe_link_ops; > > return rxe_register_device(rxe, ibdev_name); > } > @@ -206,9 +208,17 @@ static int rxe_newlink(const char *ibdev_name, struct net_device *ndev) > return err; > } > > -struct rdma_link_ops rxe_link_ops = { > +static int rxe_dellink(struct ib_device *dev) > +{ > + rxe_net_del(dev); > + > + return 0; > +} > + > +static struct rdma_link_ops rxe_link_ops = { > .type = "rxe", > .newlink = rxe_newlink, > + .dellink = rxe_dellink, > }; > > static int __init rxe_module_init(void) > diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c > index 3ca92e062800..4cc7de7b115b 100644 > --- a/drivers/infiniband/sw/rxe/rxe_net.c > +++ b/drivers/infiniband/sw/rxe/rxe_net.c > @@ -530,6 +530,21 @@ int rxe_net_add(const char *ibdev_name, struct net_device *ndev) > return 0; > } > > +#define SK_REF_FOR_TUNNEL 2 > +void rxe_net_del(struct ib_device *dev) > +{ > + if (refcount_read(&recv_sockets.sk6->sk->sk_refcnt) > SK_REF_FOR_TUNNEL) > + __sock_put(recv_sockets.sk6->sk); > + else > + rxe_release_udp_tunnel(recv_sockets.sk6); > + > + if (refcount_read(&recv_sockets.sk4->sk->sk_refcnt) > SK_REF_FOR_TUNNEL) > + __sock_put(recv_sockets.sk4->sk); > + else > + rxe_release_udp_tunnel(recv_sockets.sk4); > +} > +#undef SK_REF_FOR_TUNNEL > + > static void rxe_port_event(struct rxe_dev *rxe, > enum ib_event_type event) > { > @@ -689,8 +704,6 @@ int rxe_register_notifier(void) > > void rxe_net_exit(void) > { > - rxe_release_udp_tunnel(recv_sockets.sk6); > - rxe_release_udp_tunnel(recv_sockets.sk4); > unregister_netdevice_notifier(&rxe_net_notifier); > } These calls are moved to rxe_net_del which is called by an explicit unlink command. But if rxe_net_init fails and returns an error code this will never happen. This will result in leaking resources. Bob > > diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h > index a222c3eeae12..f48f22f3353b 100644 > --- a/drivers/infiniband/sw/rxe/rxe_net.h > +++ b/drivers/infiniband/sw/rxe/rxe_net.h > @@ -17,6 +17,7 @@ struct rxe_recv_sockets { > }; > > int rxe_net_add(const char *ibdev_name, struct net_device *ndev); > +void rxe_net_del(struct ib_device *dev); > > int rxe_register_notifier(void); > int rxe_net_init(void);