On Fri, Mar 5, 2021 at 3:23 AM Bob Pearson <rpearsonhpe@xxxxxxxxx> wrote: > > From: Bob Pearson <rpearsonhpe@xxxxxxxxx> > > Three errors occurred in the fix referenced below. > > 1) rxe_rcv_mcast_pkt() dropped a reference to ib_device when > no error occurred causing an underflow on the reference counter. > This code is cleaned up to be clearer and easier to read. > > 2) Extending the reference taken by rxe_get_dev_from_net() in > rxe_udp_encap_recv() until each skb is freed was not matched by > a reference in the loopback path resulting in underflows. > > 3) In rxe_comp.c in rxe_completer() the function free_pkt() did > not clear skb which triggered a warning at done: and could possibly > at exit: . The WARN_ONCE() calls are not actually needed. > The call to free_pkt() is moved to the end to clearly show that > all skbs are freed. Where do these not-freed skb come from? Except these skb, are other resources freed correctly? IMHO, the root cause should be found and other resources should be also handled. WARN_ON_ONCE() should be kept on exit and done. So if some skb are not freed incorrectly, we can find these skb in the loop end. Zhu Yanjun > > This patch fixes these errors. > > Fixes: 899aba891cab ("RDMA/rxe: Fix FIXME in rxe_udp_encap_recv()") > Signed-off-by: Bob Pearson <rpearsonhpe@xxxxxxxxx> > --- > Version 3: > V2 of this patch had spelling errors and style issues which are > fixed in this version. > > Version 2: > v1 of this patch incorrectly added a WARN_ON_ONCE in rxe_completer > where it could be triggered for normal traffic. This version > replaced that with a pr_warn located correctly. > > v1 of this patch placed a call to kfree_skb in an if statement > that could trigger style warnings. This version cleans that up. > > drivers/infiniband/sw/rxe/rxe_comp.c | 55 +++++++++++--------------- > drivers/infiniband/sw/rxe/rxe_net.c | 10 ++++- > drivers/infiniband/sw/rxe/rxe_recv.c | 59 +++++++++++++++++----------- > 3 files changed, 67 insertions(+), 57 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c > index a8ac791a1bb9..17a361b8dbb1 100644 > --- a/drivers/infiniband/sw/rxe/rxe_comp.c > +++ b/drivers/infiniband/sw/rxe/rxe_comp.c > @@ -547,6 +547,7 @@ int rxe_completer(void *arg) > struct sk_buff *skb = NULL; > struct rxe_pkt_info *pkt = NULL; > enum comp_state state; > + int ret = 0; > > rxe_add_ref(qp); > > @@ -554,7 +555,8 @@ int rxe_completer(void *arg) > qp->req.state == QP_STATE_RESET) { > rxe_drain_resp_pkts(qp, qp->valid && > qp->req.state == QP_STATE_ERROR); > - goto exit; > + ret = -EAGAIN; > + goto done; > } > > if (qp->comp.timeout) { > @@ -564,8 +566,10 @@ int rxe_completer(void *arg) > qp->comp.timeout_retry = 0; > } > > - if (qp->req.need_retry) > - goto exit; > + if (qp->req.need_retry) { > + ret = -EAGAIN; > + goto done; > + } > > state = COMPST_GET_ACK; > > @@ -636,8 +640,6 @@ int rxe_completer(void *arg) > break; > > case COMPST_DONE: > - if (pkt) > - free_pkt(pkt); > goto done; > > case COMPST_EXIT: > @@ -660,7 +662,8 @@ int rxe_completer(void *arg) > qp->qp_timeout_jiffies) > mod_timer(&qp->retrans_timer, > jiffies + qp->qp_timeout_jiffies); > - goto exit; > + ret = -EAGAIN; > + goto done; > > case COMPST_ERROR_RETRY: > /* we come here if the retry timer fired and we did > @@ -672,18 +675,18 @@ int rxe_completer(void *arg) > */ > > /* there is nothing to retry in this case */ > - if (!wqe || (wqe->state == wqe_state_posted)) > - goto exit; > + if (!wqe || (wqe->state == wqe_state_posted)) { > + pr_warn("Retry attempted without a valid wqe\n"); > + ret = -EAGAIN; > + goto done; > + } > > /* if we've started a retry, don't start another > * retry sequence, unless this is a timeout. > */ > if (qp->comp.started_retry && > - !qp->comp.timeout_retry) { > - if (pkt) > - free_pkt(pkt); > + !qp->comp.timeout_retry) > goto done; > - } > > if (qp->comp.retry_cnt > 0) { > if (qp->comp.retry_cnt != 7) > @@ -704,8 +707,6 @@ int rxe_completer(void *arg) > qp->comp.started_retry = 1; > rxe_run_task(&qp->req.task, 0); > } > - if (pkt) > - free_pkt(pkt); > goto done; > > } else { > @@ -726,8 +727,8 @@ int rxe_completer(void *arg) > mod_timer(&qp->rnr_nak_timer, > jiffies + rnrnak_jiffies(aeth_syn(pkt) > & ~AETH_TYPE_MASK)); > - free_pkt(pkt); > - goto exit; > + ret = -EAGAIN; > + goto done; > } else { > rxe_counter_inc(rxe, > RXE_CNT_RNR_RETRY_EXCEEDED); > @@ -740,25 +741,15 @@ int rxe_completer(void *arg) > WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS); > do_complete(qp, wqe); > rxe_qp_error(qp); > - if (pkt) > - free_pkt(pkt); > - goto exit; > + ret = -EAGAIN; > + goto done; > } > } > > -exit: > - /* we come here if we are done with processing and want the task to > - * exit from the loop calling us > - */ > - WARN_ON_ONCE(skb); > - rxe_drop_ref(qp); > - return -EAGAIN; > - > done: > - /* we come here if we have processed a packet we want the task to call > - * us again to see if there is anything else to do > - */ > - WARN_ON_ONCE(skb); > + if (pkt) > + free_pkt(pkt); > rxe_drop_ref(qp); > - return 0; > + > + return ret; > } > diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c > index 0701bd1ffd1a..01662727dca0 100644 > --- a/drivers/infiniband/sw/rxe/rxe_net.c > +++ b/drivers/infiniband/sw/rxe/rxe_net.c > @@ -407,14 +407,22 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb) > return 0; > } > > +/* fix up a send packet to match the packets > + * received from UDP before looping them back > + */ > void rxe_loopback(struct sk_buff *skb) > { > + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); > + > if (skb->protocol == htons(ETH_P_IP)) > skb_pull(skb, sizeof(struct iphdr)); > else > skb_pull(skb, sizeof(struct ipv6hdr)); > > - rxe_rcv(skb); > + if (WARN_ON(!ib_device_try_get(&pkt->rxe->ib_dev))) > + kfree_skb(skb); > + else > + rxe_rcv(skb); > } > > struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, > diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c > index 45d2f711bce2..7a49e27da23a 100644 > --- a/drivers/infiniband/sw/rxe/rxe_recv.c > +++ b/drivers/infiniband/sw/rxe/rxe_recv.c > @@ -237,8 +237,6 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) > struct rxe_mc_elem *mce; > struct rxe_qp *qp; > union ib_gid dgid; > - struct sk_buff *per_qp_skb; > - struct rxe_pkt_info *per_qp_pkt; > int err; > > if (skb->protocol == htons(ETH_P_IP)) > @@ -250,10 +248,15 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) > /* lookup mcast group corresponding to mgid, takes a ref */ > mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid); > if (!mcg) > - goto err1; /* mcast group not registered */ > + goto drop; /* mcast group not registered */ > > spin_lock_bh(&mcg->mcg_lock); > > + /* this is unreliable datagram service so we let > + * failures to deliver a multicast packet to a > + * single QP happen and just move on and try > + * the rest of them on the list > + */ > list_for_each_entry(mce, &mcg->qp_list, qp_list) { > qp = mce->qp; > > @@ -266,39 +269,47 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) > if (err) > continue; > > - /* for all but the last qp create a new clone of the > - * skb and pass to the qp. If an error occurs in the > - * checks for the last qp in the list we need to > - * free the skb since it hasn't been passed on to > - * rxe_rcv_pkt() which would free it later. > + /* for all but the last QP create a new clone of the > + * skb and pass to the QP. Pass the original skb to > + * the last QP in the list. > */ > if (mce->qp_list.next != &mcg->qp_list) { > - per_qp_skb = skb_clone(skb, GFP_ATOMIC); > - if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) { > - kfree_skb(per_qp_skb); > + struct sk_buff *cskb; > + struct rxe_pkt_info *cpkt; > + > + cskb = skb_clone(skb, GFP_ATOMIC); > + if (unlikely(!cskb)) > continue; > + > + if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) { > + kfree_skb(cskb); > + break; > } > + > + cpkt = SKB_TO_PKT(cskb); > + cpkt->qp = qp; > + rxe_add_ref(qp); > + rxe_rcv_pkt(cpkt, cskb); > } else { > - per_qp_skb = skb; > - /* show we have consumed the skb */ > - skb = NULL; > + pkt->qp = qp; > + rxe_add_ref(qp); > + rxe_rcv_pkt(pkt, skb); > + skb = NULL; /* mark consumed */ > } > - > - if (unlikely(!per_qp_skb)) > - continue; > - > - per_qp_pkt = SKB_TO_PKT(per_qp_skb); > - per_qp_pkt->qp = qp; > - rxe_add_ref(qp); > - rxe_rcv_pkt(per_qp_pkt, per_qp_skb); > } > > spin_unlock_bh(&mcg->mcg_lock); > > rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */ > > -err1: > - /* free skb if not consumed */ > + if (likely(!skb)) > + return; > + > + /* This only occurs if one of the checks fails on the last > + * QP in the list above > + */ > + > +drop: > kfree_skb(skb); > ib_device_put(&rxe->ib_dev); > } > -- > 2.27.0 >