[Patch 3/3]: dccp: resolve putative recursive lock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This fixes a problem which was first pointed out on
  http://www.mail-archive.com/dccp@xxxxxxxxxxxxxxx/msg00354.html

The problem occurs when a listening DCCPv4/6 socket has sent a Response in
reply to a Request and happens at the time the ACK is received. 

The problem occurs since sk_receive_skb locks sk, then calls the AF-specific backlog
function (dccp_v{4,6}_do_rcv). This ends up calling dccp_v{4,6}_request_recv_sock, 
which calls dccp_create_openreq_child, from there to sk_clone via inet_sk_clone. 
In sk_clone, a lock is placed on newsk, which makes the lock mechanism think that 
the same lock was applied twice.

Call graph:
**********
    dccp_v4_rcv(skb)
        |
        sk_receive_skb(sk, skb)
          |
          bh_lock_sock(sk)
          rc = sk->backlog_rcv(sk, skb)
                    |
                    dccp_v4_do_rcv(sk, skb) /* state == LISTEN */
                      |
                      dccp_v4_hdn_req(sk, skb)
                       |
                       dccp_check_req(sk, skb, req, prev)
                        |
                        child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
                              |        /* syn_recv_sock translates to the following: */
                        child = dccp_v4_request_recv_sock(sk, skb, req, NULL);
                                |     /* contains the following code: */
                                newsk = dccp_create_openreq_child(sk, req, skb)
                                      |      /* calls: */
                                newsk = inet_csk_clone(sk, req, GFP_ATOMIC)
                                      |      /* in turn calls: */
                                newsk = sk_clone(sk, priority)      /* newsk != NULL */
                                         |   /* has this code segment */
                                         sk_node_init(&newsk->sk_node)
                                         sock_lock_init(newsk)
                                         bh_lock_sock(newsk)        /* <=== putative recursive lock */
          /* ..... returns here ..... */
          bh_unlock_sock(sk)
     /* return of dccp_v4_skb(): sk is unlocked, whereas newsk is still locked  */

This patch fixes the problem by using relevant code from net/ipv?/tcp_ipv?.c, applied
with some thinking.

Effectively, the code of the short sk_receive_skb function is unfolded in-place
and the bh_nested_lock indicator is used instead of bh_lock.


Signed-off-by: Gerrit Renker <gerrit@xxxxxxxxxxxxxx>
---
 ipv4.c |   22 +++++++++++++++++++---
 ipv6.c |   27 ++++++++++++++++++++-------
 2 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index f87b0c0..f86ebcc 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -897,6 +897,7 @@ static int dccp_v4_rcv(struct sk_buff *s
 {
 	const struct dccp_hdr *dh;
 	struct sock *sk;
+	int ret = 0;
 
 	/* Step 1: Check header basics: */
 
@@ -970,7 +971,23 @@ static int dccp_v4_rcv(struct sk_buff *s
 		goto discard_and_relse;
 	nf_reset(skb);
 
-	return sk_receive_skb(sk, skb);
+	if (sk_filter(sk, skb))
+		goto discard_and_relse;
+
+	bh_lock_sock_nested(sk);    /* sk might be cloned later */
+	if (!sock_owned_by_user(sk)) {
+		/*
+		 * lock semantics stolen from sk_receive_skb
+		 */
+		mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
+		ret = dccp_v4_do_rcv(sk, skb);
+		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
+	} else
+		sk_add_backlog(sk, skb);
+	bh_unlock_sock(sk);
+
+	sock_put(sk);
+	return ret;
 
 no_dccp_socket:
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
@@ -987,9 +1004,8 @@ no_dccp_socket:
 	}
 
 discard_it:
-	/* Discard frame. */
 	kfree_skb(skb);
-	return 0;
+	return ret;
 
 discard_and_relse:
 	sock_put(sk);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 91e7b12..49690ee 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1035,6 +1035,7 @@ static int dccp_v6_rcv(struct sk_buff **
 	const struct dccp_hdr *dh;
 	struct sk_buff *skb = *pskb;
 	struct sock *sk;
+	int ret = 0;
 
 	/* Step 1: Check header basics: */
 
@@ -1078,7 +1079,23 @@ static int dccp_v6_rcv(struct sk_buff **
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
 
-	return sk_receive_skb(sk, skb) ? -1 : 0;
+	if (sk_filter(sk, skb))
+		goto discard_and_relse;
+
+	bh_lock_sock_nested(sk);    /* sk might be cloned later */
+	if (!sock_owned_by_user(sk)) {
+		/*
+		 * lock semantics stolen from sk_receive_skb
+		 */
+		mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
+		ret = dccp_v6_do_rcv(sk, skb);
+		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
+	} else
+		sk_add_backlog(sk, skb);
+	bh_unlock_sock(sk);
+
+	sock_put(sk);
+	return ret? -1 : 0;
 
 no_dccp_socket:
 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
@@ -1093,14 +1110,10 @@ no_dccp_socket:
 					DCCP_RESET_CODE_NO_CONNECTION;
 		dccp_v6_ctl_send_reset(skb);
 	}
-discard_it:
-
-	/*
-	 *	Discard frame
-	 */
 
+discard_it:
 	kfree_skb(skb);
-	return 0;
+	return ret;
 
 discard_and_relse:
 	sock_put(sk);
-
To unsubscribe from this list: send the line "unsubscribe dccp" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel]     [IETF DCCP]     [Linux Networking]     [Git]     [Security]     [Linux Assembly]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux