Re: [PATCH for-next v3] RDMA/rxe: Fix ib_device reference counting (again)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Mar 5, 2021 at 3:23 AM Bob Pearson <rpearsonhpe@xxxxxxxxx> wrote:
>
> From: Bob Pearson <rpearsonhpe@xxxxxxxxx>
>
> Three errors occurred in the fix referenced below.
>
> 1) rxe_rcv_mcast_pkt() dropped a reference to ib_device when
> no error occurred causing an underflow on the reference counter.
> This code is cleaned up to be clearer and easier to read.
>
> 2) Extending the reference taken by rxe_get_dev_from_net() in
> rxe_udp_encap_recv() until each skb is freed was not matched by
> a reference in the loopback path resulting in underflows.
>
> 3) In rxe_comp.c in rxe_completer() the function free_pkt() did
> not clear skb which triggered a warning at done: and could possibly
> at exit: . The WARN_ONCE() calls are not actually needed.
> The call to free_pkt() is moved to the end to clearly show that
> all skbs are freed.

Where do these not-freed skb come from? Except these skb, are other
resources freed correctly?

IMHO, the root cause should be found and other resources should be also handled.
WARN_ON_ONCE() should be kept on exit and done.

So if some skb are not freed incorrectly, we can find these skb in the loop end.

Zhu Yanjun

>
> This patch fixes these errors.
>
> Fixes: 899aba891cab ("RDMA/rxe: Fix FIXME in rxe_udp_encap_recv()")
> Signed-off-by: Bob Pearson <rpearsonhpe@xxxxxxxxx>
> ---
> Version 3:
> V2 of this patch had spelling errors and style issues which are
> fixed in this version.
>
> Version 2:
> v1 of this patch incorrectly added a WARN_ON_ONCE in rxe_completer
> where it could be triggered for normal traffic. This version
> replaced that with a pr_warn located correctly.
>
> v1 of this patch placed a call to kfree_skb in an if statement
> that could trigger style warnings. This version cleans that up.
>
>  drivers/infiniband/sw/rxe/rxe_comp.c | 55 +++++++++++---------------
>  drivers/infiniband/sw/rxe/rxe_net.c  | 10 ++++-
>  drivers/infiniband/sw/rxe/rxe_recv.c | 59 +++++++++++++++++-----------
>  3 files changed, 67 insertions(+), 57 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
> index a8ac791a1bb9..17a361b8dbb1 100644
> --- a/drivers/infiniband/sw/rxe/rxe_comp.c
> +++ b/drivers/infiniband/sw/rxe/rxe_comp.c
> @@ -547,6 +547,7 @@ int rxe_completer(void *arg)
>         struct sk_buff *skb = NULL;
>         struct rxe_pkt_info *pkt = NULL;
>         enum comp_state state;
> +       int ret = 0;
>
>         rxe_add_ref(qp);
>
> @@ -554,7 +555,8 @@ int rxe_completer(void *arg)
>             qp->req.state == QP_STATE_RESET) {
>                 rxe_drain_resp_pkts(qp, qp->valid &&
>                                     qp->req.state == QP_STATE_ERROR);
> -               goto exit;
> +               ret = -EAGAIN;
> +               goto done;
>         }
>
>         if (qp->comp.timeout) {
> @@ -564,8 +566,10 @@ int rxe_completer(void *arg)
>                 qp->comp.timeout_retry = 0;
>         }
>
> -       if (qp->req.need_retry)
> -               goto exit;
> +       if (qp->req.need_retry) {
> +               ret = -EAGAIN;
> +               goto done;
> +       }
>
>         state = COMPST_GET_ACK;
>
> @@ -636,8 +640,6 @@ int rxe_completer(void *arg)
>                         break;
>
>                 case COMPST_DONE:
> -                       if (pkt)
> -                               free_pkt(pkt);
>                         goto done;
>
>                 case COMPST_EXIT:
> @@ -660,7 +662,8 @@ int rxe_completer(void *arg)
>                             qp->qp_timeout_jiffies)
>                                 mod_timer(&qp->retrans_timer,
>                                           jiffies + qp->qp_timeout_jiffies);
> -                       goto exit;
> +                       ret = -EAGAIN;
> +                       goto done;
>
>                 case COMPST_ERROR_RETRY:
>                         /* we come here if the retry timer fired and we did
> @@ -672,18 +675,18 @@ int rxe_completer(void *arg)
>                          */
>
>                         /* there is nothing to retry in this case */
> -                       if (!wqe || (wqe->state == wqe_state_posted))
> -                               goto exit;
> +                       if (!wqe || (wqe->state == wqe_state_posted)) {
> +                               pr_warn("Retry attempted without a valid wqe\n");
> +                               ret = -EAGAIN;
> +                               goto done;
> +                       }
>
>                         /* if we've started a retry, don't start another
>                          * retry sequence, unless this is a timeout.
>                          */
>                         if (qp->comp.started_retry &&
> -                           !qp->comp.timeout_retry) {
> -                               if (pkt)
> -                                       free_pkt(pkt);
> +                           !qp->comp.timeout_retry)
>                                 goto done;
> -                       }
>
>                         if (qp->comp.retry_cnt > 0) {
>                                 if (qp->comp.retry_cnt != 7)
> @@ -704,8 +707,6 @@ int rxe_completer(void *arg)
>                                         qp->comp.started_retry = 1;
>                                         rxe_run_task(&qp->req.task, 0);
>                                 }
> -                               if (pkt)
> -                                       free_pkt(pkt);
>                                 goto done;
>
>                         } else {
> @@ -726,8 +727,8 @@ int rxe_completer(void *arg)
>                                 mod_timer(&qp->rnr_nak_timer,
>                                           jiffies + rnrnak_jiffies(aeth_syn(pkt)
>                                                 & ~AETH_TYPE_MASK));
> -                               free_pkt(pkt);
> -                               goto exit;
> +                               ret = -EAGAIN;
> +                               goto done;
>                         } else {
>                                 rxe_counter_inc(rxe,
>                                                 RXE_CNT_RNR_RETRY_EXCEEDED);
> @@ -740,25 +741,15 @@ int rxe_completer(void *arg)
>                         WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS);
>                         do_complete(qp, wqe);
>                         rxe_qp_error(qp);
> -                       if (pkt)
> -                               free_pkt(pkt);
> -                       goto exit;
> +                       ret = -EAGAIN;
> +                       goto done;
>                 }
>         }
>
> -exit:
> -       /* we come here if we are done with processing and want the task to
> -        * exit from the loop calling us
> -        */
> -       WARN_ON_ONCE(skb);
> -       rxe_drop_ref(qp);
> -       return -EAGAIN;
> -
>  done:
> -       /* we come here if we have processed a packet we want the task to call
> -        * us again to see if there is anything else to do
> -        */
> -       WARN_ON_ONCE(skb);
> +       if (pkt)
> +               free_pkt(pkt);
>         rxe_drop_ref(qp);
> -       return 0;
> +
> +       return ret;
>  }
> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
> index 0701bd1ffd1a..01662727dca0 100644
> --- a/drivers/infiniband/sw/rxe/rxe_net.c
> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
> @@ -407,14 +407,22 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb)
>         return 0;
>  }
>
> +/* fix up a send packet to match the packets
> + * received from UDP before looping them back
> + */
>  void rxe_loopback(struct sk_buff *skb)
>  {
> +       struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
> +
>         if (skb->protocol == htons(ETH_P_IP))
>                 skb_pull(skb, sizeof(struct iphdr));
>         else
>                 skb_pull(skb, sizeof(struct ipv6hdr));
>
> -       rxe_rcv(skb);
> +       if (WARN_ON(!ib_device_try_get(&pkt->rxe->ib_dev)))
> +               kfree_skb(skb);
> +       else
> +               rxe_rcv(skb);
>  }
>
>  struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
> diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
> index 45d2f711bce2..7a49e27da23a 100644
> --- a/drivers/infiniband/sw/rxe/rxe_recv.c
> +++ b/drivers/infiniband/sw/rxe/rxe_recv.c
> @@ -237,8 +237,6 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
>         struct rxe_mc_elem *mce;
>         struct rxe_qp *qp;
>         union ib_gid dgid;
> -       struct sk_buff *per_qp_skb;
> -       struct rxe_pkt_info *per_qp_pkt;
>         int err;
>
>         if (skb->protocol == htons(ETH_P_IP))
> @@ -250,10 +248,15 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
>         /* lookup mcast group corresponding to mgid, takes a ref */
>         mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid);
>         if (!mcg)
> -               goto err1;      /* mcast group not registered */
> +               goto drop;      /* mcast group not registered */
>
>         spin_lock_bh(&mcg->mcg_lock);
>
> +       /* this is unreliable datagram service so we let
> +        * failures to deliver a multicast packet to a
> +        * single QP happen and just move on and try
> +        * the rest of them on the list
> +        */
>         list_for_each_entry(mce, &mcg->qp_list, qp_list) {
>                 qp = mce->qp;
>
> @@ -266,39 +269,47 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
>                 if (err)
>                         continue;
>
> -               /* for all but the last qp create a new clone of the
> -                * skb and pass to the qp. If an error occurs in the
> -                * checks for the last qp in the list we need to
> -                * free the skb since it hasn't been passed on to
> -                * rxe_rcv_pkt() which would free it later.
> +               /* for all but the last QP create a new clone of the
> +                * skb and pass to the QP. Pass the original skb to
> +                * the last QP in the list.
>                  */
>                 if (mce->qp_list.next != &mcg->qp_list) {
> -                       per_qp_skb = skb_clone(skb, GFP_ATOMIC);
> -                       if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) {
> -                               kfree_skb(per_qp_skb);
> +                       struct sk_buff *cskb;
> +                       struct rxe_pkt_info *cpkt;
> +
> +                       cskb = skb_clone(skb, GFP_ATOMIC);
> +                       if (unlikely(!cskb))
>                                 continue;
> +
> +                       if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) {
> +                               kfree_skb(cskb);
> +                               break;
>                         }
> +
> +                       cpkt = SKB_TO_PKT(cskb);
> +                       cpkt->qp = qp;
> +                       rxe_add_ref(qp);
> +                       rxe_rcv_pkt(cpkt, cskb);
>                 } else {
> -                       per_qp_skb = skb;
> -                       /* show we have consumed the skb */
> -                       skb = NULL;
> +                       pkt->qp = qp;
> +                       rxe_add_ref(qp);
> +                       rxe_rcv_pkt(pkt, skb);
> +                       skb = NULL;     /* mark consumed */
>                 }
> -
> -               if (unlikely(!per_qp_skb))
> -                       continue;
> -
> -               per_qp_pkt = SKB_TO_PKT(per_qp_skb);
> -               per_qp_pkt->qp = qp;
> -               rxe_add_ref(qp);
> -               rxe_rcv_pkt(per_qp_pkt, per_qp_skb);
>         }
>
>         spin_unlock_bh(&mcg->mcg_lock);
>
>         rxe_drop_ref(mcg);      /* drop ref from rxe_pool_get_key. */
>
> -err1:
> -       /* free skb if not consumed */
> +       if (likely(!skb))
> +               return;
> +
> +       /* This only occurs if one of the checks fails on the last
> +        * QP in the list above
> +        */
> +
> +drop:
>         kfree_skb(skb);
>         ib_device_put(&rxe->ib_dev);
>  }
> --
> 2.27.0
>



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux