[PATCH for-next 4/9] RDMA/rxe: Fix delayed send packet handling

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



In cable pull testing some NICs can hold a send packet long enough
to allow ulp protocol stacks to destroy the qp and the cleanup
routines to timeout waiting for all qp references to be released.
When the NIC driver finally frees the SKB the qp pointer is no longer
valid and causes a seg fault in rxe_skb_tx_dtor().

This patch passes the qp index instead of the qp to the skb destructor
callback function. The call back is required to lookup the qp from the
index and if it has been destroyed the lookup will return NULL and the
qp will not be referenced avoiding the seg fault.

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Signed-off-by: Bob Pearson <rpearsonhpe@xxxxxxxxx>
---
 drivers/infiniband/sw/rxe/rxe_net.c | 87 ++++++++++++++++++++++-------
 drivers/infiniband/sw/rxe/rxe_qp.c  |  1 -
 2 files changed, 67 insertions(+), 21 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index cd59666158b1..10e4a752ff7c 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -131,19 +131,28 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev,
 	return dst;
 }
 
+static struct rxe_dev *get_rxe_from_skb(struct sk_buff *skb)
+{
+	struct rxe_dev *rxe;
+	struct net_device *ndev = skb->dev;
+
+	rxe = rxe_get_dev_from_net(ndev);
+	if (!rxe && is_vlan_dev(ndev))
+		rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev));
+
+	return rxe;
+}
+
 static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct udphdr *udph;
 	struct rxe_dev *rxe;
-	struct net_device *ndev = skb->dev;
 	struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
 
 	/* takes a reference on rxe->ib_dev
 	 * drop when skb is freed
 	 */
-	rxe = rxe_get_dev_from_net(ndev);
-	if (!rxe && is_vlan_dev(ndev))
-		rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev));
+	rxe = get_rxe_from_skb(skb);
 	if (!rxe)
 		goto drop;
 
@@ -345,46 +354,84 @@ int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt,
 
 static void rxe_skb_tx_dtor(struct sk_buff *skb)
 {
-	struct sock *sk = skb->sk;
-	struct rxe_qp *qp = sk->sk_user_data;
-	int skb_out = atomic_dec_return(&qp->skb_out);
+	struct rxe_dev *rxe;
+	unsigned int index;
+	struct rxe_qp *qp;
+	int skb_out;
+
+	/* takes a ref on ib device if success */
+	rxe = get_rxe_from_skb(skb);
+	if (!rxe)
+		goto out;
+
+	/* recover source qp index from sk->sk_user_data
+	 * free the reference taken in rxe_send
+	 */
+	index = (int)(uintptr_t)skb->sk->sk_user_data;
+	sock_put(skb->sk);
 
+	/* lookup qp from index, takes a ref on success */
+	qp = rxe_pool_get_index(&rxe->qp_pool, index);
+	if (!qp)
+		goto out_put_ibdev;
+
+	skb_out = atomic_dec_return(&qp->skb_out);
 	if (unlikely(qp->need_req_skb &&
 		     skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW))
 		rxe_sched_task(&qp->req.task);
 
 	rxe_put(qp);
+out_put_ibdev:
+	ib_device_put(&rxe->ib_dev);
+out:
+	return;
 }
 
 static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
 {
+	struct rxe_qp *qp = pkt->qp;
+	struct sock *sk;
 	int err;
 
-	skb->destructor = rxe_skb_tx_dtor;
-	skb->sk = pkt->qp->sk->sk;
+	/* qp can be destroyed while this packet is waiting on
+	 * the tx queue. So need to protect sk.
+	 */
+	sk = qp->sk->sk;
+	sock_hold(sk);
+	skb->sk = sk;
 
-	rxe_get(pkt->qp);
 	atomic_inc(&pkt->qp->skb_out);
 
+	sk->sk_user_data = (void *)(long)qp->elem.index;
+	skb->destructor = rxe_skb_tx_dtor;
+
 	if (skb->protocol == htons(ETH_P_IP)) {
-		err = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
+		err = ip_local_out(dev_net(skb_dst(skb)->dev), sk, skb);
 	} else if (skb->protocol == htons(ETH_P_IPV6)) {
-		err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
+		err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb);
 	} else {
-		rxe_dbg_qp(pkt->qp, "Unknown layer 3 protocol: %d\n",
-				skb->protocol);
-		atomic_dec(&pkt->qp->skb_out);
-		rxe_put(pkt->qp);
-		kfree_skb(skb);
-		return -EINVAL;
+		rxe_dbg_qp(qp, "Unknown layer 3 protocol: %d",
+			   skb->protocol);
+		err = -EINVAL;
+		goto err_not_sent;
 	}
 
+	/* IP consumed the packet, the destructor will handle cleanup */
 	if (unlikely(net_xmit_eval(err))) {
-		rxe_dbg_qp(pkt->qp, "error sending packet: %d\n", err);
-		return -EAGAIN;
+		rxe_dbg_qp(qp, "Error sending packet: %d", err);
+		err = -EAGAIN;
+		goto err_out;
 	}
 
 	return 0;
+
+err_not_sent:
+	skb->destructor = NULL;
+	atomic_dec(&pkt->qp->skb_out);
+	kfree_skb(skb);
+	sock_put(sk);
+err_out:
+	return err;
 }
 
 /* fix up a send packet to match the packets
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index a569b111a9d2..dcbf71031453 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -194,7 +194,6 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
 	err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk);
 	if (err < 0)
 		return err;
-	qp->sk->sk->sk_user_data = qp;
 
 	/* pick a source UDP port number for this QP based on
 	 * the source QPN. this spreads traffic for different QPs
-- 
2.39.2




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux