On Sun, Jul 23, 2023 at 09:03:34PM +0800, Zhu Yanjun wrote: > On Sat, Jul 22, 2023 at 4:51 AM Bob Pearson <rpearsonhpe@xxxxxxxxx> wrote: > > > > In cable pull testing some NICs can hold a send packet long enough > > to allow ulp protocol stacks to destroy the qp and the cleanup > > routines to timeout waiting for all qp references to be released. > > When the NIC driver finally frees the SKB the qp pointer is no longer > > valid and causes a seg fault in rxe_skb_tx_dtor(). > > If a packet is held in some NICs, the later packets of this packet > will also be held by this NIC. So this will cause QP not to work well. > This is a serious problem. And the problem should be fixed in this > kind of NICs. We should not fix it in RXE. > > And a QP exists for at least several seconds. A packet can be held in > NIC for such a long time? This problem does exist in the real world, > or you imagine this scenario? > > I can not imagine this kind of scenario. Please Jason or Leon also check this. I stopped to follow RXE changes for a long time. They don't make any sense to me. Even this patch, which moves existing IB reference counter from one place to another and does it for every SKB is beyond my understanding. Thanks > > Thanks. > Zhu Yanjun > > > > This patch passes the qp index instead of the qp to the skb destructor > > callback function. The call back is required to lookup the qp from the > > index and if it has been destroyed the lookup will return NULL and the > > qp will not be referenced avoiding the seg fault. > > > > Fixes: 8700e3e7c485 ("Soft RoCE driver") > > Signed-off-by: Bob Pearson <rpearsonhpe@xxxxxxxxx> > > --- > > drivers/infiniband/sw/rxe/rxe_net.c | 87 ++++++++++++++++++++++------- > > drivers/infiniband/sw/rxe/rxe_qp.c | 1 - > > 2 files changed, 67 insertions(+), 21 deletions(-) > > > > diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c > > index cd59666158b1..10e4a752ff7c 100644 > > --- a/drivers/infiniband/sw/rxe/rxe_net.c > > +++ b/drivers/infiniband/sw/rxe/rxe_net.c > > @@ -131,19 +131,28 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev, > > return dst; > > } > > > > +static struct rxe_dev *get_rxe_from_skb(struct sk_buff *skb) > > +{ > > + struct rxe_dev *rxe; > > + struct net_device *ndev = skb->dev; > > + > > + rxe = rxe_get_dev_from_net(ndev); > > + if (!rxe && is_vlan_dev(ndev)) > > + rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev)); > > + > > + return rxe; > > +} > > + > > static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) > > { > > struct udphdr *udph; > > struct rxe_dev *rxe; > > - struct net_device *ndev = skb->dev; > > struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); > > > > /* takes a reference on rxe->ib_dev > > * drop when skb is freed > > */ > > - rxe = rxe_get_dev_from_net(ndev); > > - if (!rxe && is_vlan_dev(ndev)) > > - rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev)); > > + rxe = get_rxe_from_skb(skb); > > if (!rxe) > > goto drop; > > > > @@ -345,46 +354,84 @@ int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt, > > > > static void rxe_skb_tx_dtor(struct sk_buff *skb) > > { > > - struct sock *sk = skb->sk; > > - struct rxe_qp *qp = sk->sk_user_data; > > - int skb_out = atomic_dec_return(&qp->skb_out); > > + struct rxe_dev *rxe; > > + unsigned int index; > > + struct rxe_qp *qp; > > + int skb_out; > > + > > + /* takes a ref on ib device if success */ > > + rxe = get_rxe_from_skb(skb); > > + if (!rxe) > > + goto out; > > + > > + /* recover source qp index from sk->sk_user_data > > + * free the reference taken in rxe_send > > + */ > > + index = (int)(uintptr_t)skb->sk->sk_user_data; > > + sock_put(skb->sk); > > > > + /* lookup qp from index, takes a ref on success */ > > + qp = rxe_pool_get_index(&rxe->qp_pool, index); > > + if (!qp) > > + goto out_put_ibdev; > > + > > + skb_out = atomic_dec_return(&qp->skb_out); > > if (unlikely(qp->need_req_skb && > > skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)) > > rxe_sched_task(&qp->req.task); > > > > rxe_put(qp); > > +out_put_ibdev: > > + ib_device_put(&rxe->ib_dev); > > +out: > > + return; > > } > > > > static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) > > { > > + struct rxe_qp *qp = pkt->qp; > > + struct sock *sk; > > int err; > > > > - skb->destructor = rxe_skb_tx_dtor; > > - skb->sk = pkt->qp->sk->sk; > > + /* qp can be destroyed while this packet is waiting on > > + * the tx queue. So need to protect sk. > > + */ > > + sk = qp->sk->sk; > > + sock_hold(sk); > > + skb->sk = sk; > > > > - rxe_get(pkt->qp); > > atomic_inc(&pkt->qp->skb_out); > > > > + sk->sk_user_data = (void *)(long)qp->elem.index; > > + skb->destructor = rxe_skb_tx_dtor; > > + > > if (skb->protocol == htons(ETH_P_IP)) { > > - err = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); > > + err = ip_local_out(dev_net(skb_dst(skb)->dev), sk, skb); > > } else if (skb->protocol == htons(ETH_P_IPV6)) { > > - err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); > > + err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb); > > } else { > > - rxe_dbg_qp(pkt->qp, "Unknown layer 3 protocol: %d\n", > > - skb->protocol); > > - atomic_dec(&pkt->qp->skb_out); > > - rxe_put(pkt->qp); > > - kfree_skb(skb); > > - return -EINVAL; > > + rxe_dbg_qp(qp, "Unknown layer 3 protocol: %d", > > + skb->protocol); > > + err = -EINVAL; > > + goto err_not_sent; > > } > > > > + /* IP consumed the packet, the destructor will handle cleanup */ > > if (unlikely(net_xmit_eval(err))) { > > - rxe_dbg_qp(pkt->qp, "error sending packet: %d\n", err); > > - return -EAGAIN; > > + rxe_dbg_qp(qp, "Error sending packet: %d", err); > > + err = -EAGAIN; > > + goto err_out; > > } > > > > return 0; > > + > > +err_not_sent: > > + skb->destructor = NULL; > > + atomic_dec(&pkt->qp->skb_out); > > + kfree_skb(skb); > > + sock_put(sk); > > +err_out: > > + return err; > > } > > > > /* fix up a send packet to match the packets > > diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c > > index a569b111a9d2..dcbf71031453 100644 > > --- a/drivers/infiniband/sw/rxe/rxe_qp.c > > +++ b/drivers/infiniband/sw/rxe/rxe_qp.c > > @@ -194,7 +194,6 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, > > err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk); > > if (err < 0) > > return err; > > - qp->sk->sk->sk_user_data = qp; > > > > /* pick a source UDP port number for this QP based on > > * the source QPN. this spreads traffic for different QPs > > -- > > 2.39.2 > >