Re: [PATCH for-next 4/9] RDMA/rxe: Fix delayed send packet handling

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sat, Jul 22, 2023 at 4:51 AM Bob Pearson <rpearsonhpe@xxxxxxxxx> wrote:
>
> In cable pull testing some NICs can hold a send packet long enough
> to allow ulp protocol stacks to destroy the qp and the cleanup
> routines to timeout waiting for all qp references to be released.
> When the NIC driver finally frees the SKB the qp pointer is no longer
> valid and causes a seg fault in rxe_skb_tx_dtor().

If a packet is held in some NICs, the later packets of this packet
will also be held by this NIC. So this will cause QP not to work well.
This is a serious problem. And the problem should be fixed in this
kind of NICs. We should not fix it in RXE.

And a QP exists for at least several seconds. A packet can be held in
NIC for such a long time? This problem does exist in the real world,
or you imagine this scenario?

I can not imagine this kind of scenario. Please Jason or Leon also check this.

Thanks.
Zhu Yanjun
>
> This patch passes the qp index instead of the qp to the skb destructor
> callback function. The call back is required to lookup the qp from the
> index and if it has been destroyed the lookup will return NULL and the
> qp will not be referenced avoiding the seg fault.
>
> Fixes: 8700e3e7c485 ("Soft RoCE driver")
> Signed-off-by: Bob Pearson <rpearsonhpe@xxxxxxxxx>
> ---
>  drivers/infiniband/sw/rxe/rxe_net.c | 87 ++++++++++++++++++++++-------
>  drivers/infiniband/sw/rxe/rxe_qp.c  |  1 -
>  2 files changed, 67 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
> index cd59666158b1..10e4a752ff7c 100644
> --- a/drivers/infiniband/sw/rxe/rxe_net.c
> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
> @@ -131,19 +131,28 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev,
>         return dst;
>  }
>
> +static struct rxe_dev *get_rxe_from_skb(struct sk_buff *skb)
> +{
> +       struct rxe_dev *rxe;
> +       struct net_device *ndev = skb->dev;
> +
> +       rxe = rxe_get_dev_from_net(ndev);
> +       if (!rxe && is_vlan_dev(ndev))
> +               rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev));
> +
> +       return rxe;
> +}
> +
>  static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
>  {
>         struct udphdr *udph;
>         struct rxe_dev *rxe;
> -       struct net_device *ndev = skb->dev;
>         struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
>
>         /* takes a reference on rxe->ib_dev
>          * drop when skb is freed
>          */
> -       rxe = rxe_get_dev_from_net(ndev);
> -       if (!rxe && is_vlan_dev(ndev))
> -               rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev));
> +       rxe = get_rxe_from_skb(skb);
>         if (!rxe)
>                 goto drop;
>
> @@ -345,46 +354,84 @@ int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt,
>
>  static void rxe_skb_tx_dtor(struct sk_buff *skb)
>  {
> -       struct sock *sk = skb->sk;
> -       struct rxe_qp *qp = sk->sk_user_data;
> -       int skb_out = atomic_dec_return(&qp->skb_out);
> +       struct rxe_dev *rxe;
> +       unsigned int index;
> +       struct rxe_qp *qp;
> +       int skb_out;
> +
> +       /* takes a ref on ib device if success */
> +       rxe = get_rxe_from_skb(skb);
> +       if (!rxe)
> +               goto out;
> +
> +       /* recover source qp index from sk->sk_user_data
> +        * free the reference taken in rxe_send
> +        */
> +       index = (int)(uintptr_t)skb->sk->sk_user_data;
> +       sock_put(skb->sk);
>
> +       /* lookup qp from index, takes a ref on success */
> +       qp = rxe_pool_get_index(&rxe->qp_pool, index);
> +       if (!qp)
> +               goto out_put_ibdev;
> +
> +       skb_out = atomic_dec_return(&qp->skb_out);
>         if (unlikely(qp->need_req_skb &&
>                      skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW))
>                 rxe_sched_task(&qp->req.task);
>
>         rxe_put(qp);
> +out_put_ibdev:
> +       ib_device_put(&rxe->ib_dev);
> +out:
> +       return;
>  }
>
>  static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
>  {
> +       struct rxe_qp *qp = pkt->qp;
> +       struct sock *sk;
>         int err;
>
> -       skb->destructor = rxe_skb_tx_dtor;
> -       skb->sk = pkt->qp->sk->sk;
> +       /* qp can be destroyed while this packet is waiting on
> +        * the tx queue. So need to protect sk.
> +        */
> +       sk = qp->sk->sk;
> +       sock_hold(sk);
> +       skb->sk = sk;
>
> -       rxe_get(pkt->qp);
>         atomic_inc(&pkt->qp->skb_out);
>
> +       sk->sk_user_data = (void *)(long)qp->elem.index;
> +       skb->destructor = rxe_skb_tx_dtor;
> +
>         if (skb->protocol == htons(ETH_P_IP)) {
> -               err = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
> +               err = ip_local_out(dev_net(skb_dst(skb)->dev), sk, skb);
>         } else if (skb->protocol == htons(ETH_P_IPV6)) {
> -               err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
> +               err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb);
>         } else {
> -               rxe_dbg_qp(pkt->qp, "Unknown layer 3 protocol: %d\n",
> -                               skb->protocol);
> -               atomic_dec(&pkt->qp->skb_out);
> -               rxe_put(pkt->qp);
> -               kfree_skb(skb);
> -               return -EINVAL;
> +               rxe_dbg_qp(qp, "Unknown layer 3 protocol: %d",
> +                          skb->protocol);
> +               err = -EINVAL;
> +               goto err_not_sent;
>         }
>
> +       /* IP consumed the packet, the destructor will handle cleanup */
>         if (unlikely(net_xmit_eval(err))) {
> -               rxe_dbg_qp(pkt->qp, "error sending packet: %d\n", err);
> -               return -EAGAIN;
> +               rxe_dbg_qp(qp, "Error sending packet: %d", err);
> +               err = -EAGAIN;
> +               goto err_out;
>         }
>
>         return 0;
> +
> +err_not_sent:
> +       skb->destructor = NULL;
> +       atomic_dec(&pkt->qp->skb_out);
> +       kfree_skb(skb);
> +       sock_put(sk);
> +err_out:
> +       return err;
>  }
>
>  /* fix up a send packet to match the packets
> diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
> index a569b111a9d2..dcbf71031453 100644
> --- a/drivers/infiniband/sw/rxe/rxe_qp.c
> +++ b/drivers/infiniband/sw/rxe/rxe_qp.c
> @@ -194,7 +194,6 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
>         err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk);
>         if (err < 0)
>                 return err;
> -       qp->sk->sk->sk_user_data = qp;
>
>         /* pick a source UDP port number for this QP based on
>          * the source QPN. this spreads traffic for different QPs
> --
> 2.39.2
>




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux