Re: [PATCH for-next v2] RDMA/rxe: Fix ib_device reference counting (again)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Mar 4, 2021 at 7:02 AM Bob Pearson <rpearsonhpe@xxxxxxxxx> wrote:
>
> Three errors occurred in the fix referenced below.
>
> 1) rxe_rcv_mcast_pkt() dropped a reference to ib_device when
> no error occured causing an underflow on the reference counter.
> This code is cleaned up to be clearer and easier to read.
>
> 2) Extending the reference taken by rxe_get_dev_from_net() in
> rxe_udp_encap_recv() until each skb is freed was not matched by
> a reference in the loopback path resulting in underflows.
>
> 3) In rxe_comp.c the function free_pkt() did not clear skb which
> triggered a warning at done: and could possibly at exit: in
> rxe_completer(). The WARN_ONCE() calls are not actually needed.
>
> This patch fixes these errors.
>
> Fixes: 899aba891cab ("RDMA/rxe: Fix FIXME in rxe_udp_encap_recv()")
> Signed-off-by: Bob Pearson <rpearson@xxxxxxx>
> ---
> Version 2:
> v1 of this patch incorrectly added a WARN_ON_ONCE in rxe_completer
> where it could be triggered for normal traffic. This version
> replaced that with a pr_warn located correctly.
>
> v1 of this patch placed a call to kfree_skb in an if statement
> that could trigger style warnings. This version cleans that up.
>
>  drivers/infiniband/sw/rxe/rxe_comp.c |  6 +--
>  drivers/infiniband/sw/rxe/rxe_net.c  | 10 ++++-
>  drivers/infiniband/sw/rxe/rxe_recv.c | 60 +++++++++++++++++-----------
>  3 files changed, 48 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
> index a8ac791a1bb9..96e5a73579f8 100644
> --- a/drivers/infiniband/sw/rxe/rxe_comp.c
> +++ b/drivers/infiniband/sw/rxe/rxe_comp.c
> @@ -672,8 +672,10 @@ int rxe_completer(void *arg)
>                          */
>
>                         /* there is nothing to retry in this case */
> -                       if (!wqe || (wqe->state == wqe_state_posted))
> +                       if (!wqe || (wqe->state == wqe_state_posted)) {
> +                               pr_warn("Retry attempted without a valid wqe\n");
>                                 goto exit;
> +                       }
>
>                         /* if we've started a retry, don't start another
>                          * retry sequence, unless this is a timeout.
> @@ -750,7 +752,6 @@ int rxe_completer(void *arg)
>         /* we come here if we are done with processing and want the task to
>          * exit from the loop calling us
>          */
> -       WARN_ON_ONCE(skb);

IMHO, the above "WARN_ON_ONCE(skb);" had better be kept.

>         rxe_drop_ref(qp);
>         return -EAGAIN;
>
> @@ -758,7 +759,6 @@ int rxe_completer(void *arg)
>         /* we come here if we have processed a packet we want the task to call
>          * us again to see if there is anything else to do
>          */
> -       WARN_ON_ONCE(skb);

The above "WARN_ON_ONCE(skb);" had better also be kept.

Zhu Yanjun

>         rxe_drop_ref(qp);
>         return 0;
>  }
> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
> index 0701bd1ffd1a..01662727dca0 100644
> --- a/drivers/infiniband/sw/rxe/rxe_net.c
> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
> @@ -407,14 +407,22 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb)
>         return 0;
>  }
>
> +/* fix up a send packet to match the packets
> + * received from UDP before looping them back
> + */
>  void rxe_loopback(struct sk_buff *skb)
>  {
> +       struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
> +
>         if (skb->protocol == htons(ETH_P_IP))
>                 skb_pull(skb, sizeof(struct iphdr));
>         else
>                 skb_pull(skb, sizeof(struct ipv6hdr));
>
> -       rxe_rcv(skb);
> +       if (WARN_ON(!ib_device_try_get(&pkt->rxe->ib_dev)))
> +               kfree_skb(skb);
> +       else
> +               rxe_rcv(skb);
>  }
>
>  struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
> diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
> index 45d2f711bce2..2b2465744896 100644
> --- a/drivers/infiniband/sw/rxe/rxe_recv.c
> +++ b/drivers/infiniband/sw/rxe/rxe_recv.c
> @@ -237,8 +237,6 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
>         struct rxe_mc_elem *mce;
>         struct rxe_qp *qp;
>         union ib_gid dgid;
> -       struct sk_buff *per_qp_skb;
> -       struct rxe_pkt_info *per_qp_pkt;
>         int err;
>
>         if (skb->protocol == htons(ETH_P_IP))
> @@ -250,10 +248,15 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
>         /* lookup mcast group corresponding to mgid, takes a ref */
>         mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid);
>         if (!mcg)
> -               goto err1;      /* mcast group not registered */
> +               goto drop;      /* mcast group not registered */
>
>         spin_lock_bh(&mcg->mcg_lock);
>
> +       /* this is unreliable datagram service so we let
> +        * failures to deliver a multicast packet to a
> +        * single QP happen and just move on and try
> +        * the rest of them on the list
> +        */
>         list_for_each_entry(mce, &mcg->qp_list, qp_list) {
>                 qp = mce->qp;
>
> @@ -266,39 +269,48 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
>                 if (err)
>                         continue;
>
> -               /* for all but the last qp create a new clone of the
> -                * skb and pass to the qp. If an error occurs in the
> -                * checks for the last qp in the list we need to
> -                * free the skb since it hasn't been passed on to
> -                * rxe_rcv_pkt() which would free it later.
> +               /* for all but the last QP create a new clone of the
> +                * skb and pass to the QP. Pass the original skb to
> +                * the last QP in the list.
>                  */
>                 if (mce->qp_list.next != &mcg->qp_list) {
> -                       per_qp_skb = skb_clone(skb, GFP_ATOMIC);
> -                       if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) {
> -                               kfree_skb(per_qp_skb);
> +                       struct sk_buff *cskb;
> +                       struct rxe_pkt_info *cpkt;
> +
> +                       cskb = skb_clone(skb, GFP_ATOMIC);
> +                       if (unlikely(!cskb))
>                                 continue;
> +
> +                       if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) {
> +                               kfree_skb(cskb);
> +                               break;
>                         }
> +
> +                       cpkt = SKB_TO_PKT(cskb);
> +                       cpkt->qp = qp;
> +                       rxe_add_ref(qp);
> +                       rxe_rcv_pkt(cpkt, cskb);
>                 } else {
> -                       per_qp_skb = skb;
> -                       /* show we have consumed the skb */
> -                       skb = NULL;
> +                       pkt->qp = qp;
> +                       rxe_add_ref(qp);
> +                       rxe_rcv_pkt(pkt, skb);
> +                       skb = NULL;     /* mark consumed */
>                 }
> -
> -               if (unlikely(!per_qp_skb))
> -                       continue;
> -
> -               per_qp_pkt = SKB_TO_PKT(per_qp_skb);
> -               per_qp_pkt->qp = qp;
> -               rxe_add_ref(qp);
> -               rxe_rcv_pkt(per_qp_pkt, per_qp_skb);
>         }
>
>         spin_unlock_bh(&mcg->mcg_lock);
>
>         rxe_drop_ref(mcg);      /* drop ref from rxe_pool_get_key. */
>
> -err1:
> -       /* free skb if not consumed */
> +       if (likely(!skb))
> +               return;
> +
> +       /* Fall through to drop packet
> +        * This only occurs if one of the checks fails on the last
> +        * QP in the list above
> +        */
> +
> +drop:
>         kfree_skb(skb);
>         ib_device_put(&rxe->ib_dev);
>  }
> --
> 2.27.0
>



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux