[DCCP]: Integrate state transitions for passive-close This adds the necessary state transitions for the two forms of passive-close * PASSIVE_CLOSE - which is entered when a host receives a Close; * PASSIVE_CLOSEREQ - which is entered when a client receives a CloseReq. Here is a detailed account of what the patch does in each state. 1) Receiving CloseReq ---------------------- The pseudo-code in 8.5 says: Step 13: Process CloseReq If P.type == CloseReq and S.state < CLOSEREQ, Generate Close S.state := CLOSING Set CLOSING timer. This means we need to address what to do in CLOSED, LISTEN, REQUEST, RESPOND, PARTOPEN, and OPEN. * CLOSED: silently ignore - it may be a late or duplicate CloseReq; * LISTEN/RESPOND: will not appear, since Step 7 is performed first (we know we are the client); * REQUEST: perform Step 13 directly (no need to enqueue packet); * OPEN/PARTOPEN: enter PASSIVE_CLOSEREQ so that the application has a chance to process unread data. When already in PASSIVE_CLOSEREQ, no second CloseReq is enqueued. In any other state, the CloseReq is ignored. I think that this offers some robustness against rare and pathological cases: e.g. a simultaneous close where the client sends a Close and the server a CloseReq. The client will then be retransmitting its Close until it gets the Reset, so ignoring the CloseReq while in state CLOSING is sane. 2) Receiving Close ------------------- The code below from 8.5 is unconditional. Step 14: Process Close If P.type == Close, Generate Reset(Closed) Tear down connection Drop packet and return Thus we need to consider all states: * CLOSED: silently ignore, since this can happen when a retransmitted or late Close arrives; * LISTEN: dccp_rcv_state_process() will generate a Reset ("No Connection"); * REQUEST: perform Step 14 directly (no need to enqueue packet); * RESPOND: dccp_check_req() will generate a Reset ("Packet Error") -- left it at that; * OPEN/PARTOPEN: enter PASSIVE_CLOSE so that application has a chance to process unread data; * CLOSEREQ: server performed active-close -- perform Step 14; * CLOSING: simultaneous-close: use a tie-breaker to avoid message ping-pong (see comment); * PASSIVE_CLOSEREQ: ignore - the peer has a bug (sending first a CloseReq and now a Close); * TIMEWAIT: packet is ignored. Note that the condition of receiving a packet in state CLOSED here is different from the condition "there is no socket for such a connection": the socket still exists, but its state indicates it is unusable. Last, dccp_finish_passive_close sets either DCCP_CLOSED or DCCP_CLOSING = TCP_CLOSING, so that sk_stream_wait_close() will wait for the final Reset (which will trigger CLOSING => CLOSED). Signed-off-by: Gerrit Renker <gerrit@xxxxxxxxxxxxxx> --- include/linux/dccp.h | 1 net/dccp/input.c | 88 ++++++++++++++++++++++++++++++++++++++++++--------- net/dccp/proto.c | 88 +++++++++++++++++++++++++++++++++------------------ 3 files changed, 131 insertions(+), 46 deletions(-) --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -32,16 +32,56 @@ static void dccp_fin(struct sock *sk, st sk->sk_data_ready(sk, 0); } -static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb) +static int dccp_rcv_close(struct sock *sk, struct sk_buff *skb) { - dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED); - dccp_fin(sk, skb); - dccp_set_state(sk, DCCP_CLOSED); - sk_wake_async(sk, 1, POLL_HUP); + int queued = 0; + + switch (sk->sk_state) { + /* + * We ignore Close when received in one of the following states: + * - CLOSED (may be a late or duplicate packet) + * - PASSIVE_CLOSEREQ (the peer has sent a CloseReq earlier) + * - RESPOND (already handled by dccp_check_req) + */ + case DCCP_CLOSING: + /* + * Simultaneous-close: receiving a Close after sending one. This + * can happen if both client and server perform active-close and + * will result in an endless ping-pong of crossing and retrans- + * mitted Close packets, which only terminates when one of the + * nodes times out (min. 64 seconds). Quicker convergence can be + * achieved when one of the nodes acts as tie-breaker. + * This is ok as both ends are done with data transfer and each + * end is just waiting for the other to acknowledge termination. + */ + if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) + break; + /* fall through */ + case DCCP_REQUESTING: + case DCCP_ACTIVE_CLOSEREQ: + dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED); + dccp_done(sk); + break; + case DCCP_OPEN: + case DCCP_PARTOPEN: + /* Give waiting application a chance to read pending data */ + queued = 1; + dccp_fin(sk, skb); + dccp_set_state(sk, DCCP_PASSIVE_CLOSE); + /* fall through */ + case DCCP_PASSIVE_CLOSE: + /* + * Retransmitted Close: we have already enqueued the first one. + */ + sk_wake_async(sk, 1, POLL_HUP); + } + return queued; } -static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) +static int dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) { + int queued = 0; + /* * Step 7: Check for unexpected packet types * If (S.is_server and P.type == CloseReq) @@ -50,12 +90,26 @@ static void dccp_rcv_closereq(struct soc */ if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) { dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); - return; + return queued; } - if (sk->sk_state != DCCP_CLOSING) + /* Step 13: process relevant Client states < CLOSEREQ */ + switch (sk->sk_state) { + case DCCP_REQUESTING: + dccp_send_close(sk, 0); dccp_set_state(sk, DCCP_CLOSING); - dccp_send_close(sk, 0); + break; + case DCCP_OPEN: + case DCCP_PARTOPEN: + /* Give waiting application a chance to read pending data */ + queued = 1; + dccp_fin(sk, skb); + dccp_set_state(sk, DCCP_PASSIVE_CLOSEREQ); + /* fall through */ + case DCCP_PASSIVE_CLOSEREQ: + sk_wake_async(sk, 1, POLL_HUP); + } + return queued; } static u8 dccp_reset_code_convert(const u8 code) @@ -231,11 +285,13 @@ static int __dccp_rcv_established(struct dccp_rcv_reset(sk, skb); return 0; case DCCP_PKT_CLOSEREQ: - dccp_rcv_closereq(sk, skb); + if (dccp_rcv_closereq(sk, skb)) + return 0; goto discard; case DCCP_PKT_CLOSE: - dccp_rcv_close(sk, skb); - return 0; + if (dccp_rcv_close(sk, skb)) + return 0; + goto discard; case DCCP_PKT_REQUEST: /* Step 7 * or (S.is_server and P.type == Response) @@ -577,11 +633,13 @@ int dccp_rcv_state_process(struct sock * dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); goto discard; } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { - dccp_rcv_closereq(sk, skb); + if (dccp_rcv_closereq(sk, skb)) + return 0; goto discard; } else if (dh->dccph_type == DCCP_PKT_CLOSE) { - dccp_rcv_close(sk, skb); - return 0; + if (dccp_rcv_close(sk, skb)) + return 0; + goto discard; } switch (sk->sk_state) { --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -71,7 +71,8 @@ void dccp_set_state(struct sock *sk, con break; case DCCP_CLOSED: - if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN) + if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ || + oldstate == DCCP_CLOSING) DCCP_INC_STATS(DCCP_MIB_ESTABRESETS); sk->sk_prot->unhash(sk); @@ -92,6 +93,24 @@ void dccp_set_state(struct sock *sk, con EXPORT_SYMBOL_GPL(dccp_set_state); +static void dccp_finish_passive_close(struct sock *sk) +{ + switch (sk->sk_state) { + case DCCP_PASSIVE_CLOSE: + /* Node (client or server) has received Close packet. */ + dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED); + dccp_set_state(sk, DCCP_CLOSED); + break; + case DCCP_PASSIVE_CLOSEREQ: + /* + * Client received CloseReq. We set the `active' flag so that + * dccp_send_close() retransmits the Close as per RFC 4340, 8.3. + */ + dccp_send_close(sk, 1); + dccp_set_state(sk, DCCP_CLOSING); + } +} + void dccp_done(struct sock *sk) { dccp_set_state(sk, DCCP_CLOSED); @@ -750,19 +769,26 @@ int dccp_recvmsg(struct kiocb *iocb, str dh = dccp_hdr(skb); - if (dh->dccph_type == DCCP_PKT_DATA || - dh->dccph_type == DCCP_PKT_DATAACK) + switch (dh->dccph_type) { + case DCCP_PKT_DATA: + case DCCP_PKT_DATAACK: goto found_ok_skb; - if (dh->dccph_type == DCCP_PKT_RESET || - dh->dccph_type == DCCP_PKT_CLOSE) { - dccp_pr_debug("found fin ok!\n"); + case DCCP_PKT_CLOSE: + case DCCP_PKT_CLOSEREQ: + if (!(flags & MSG_PEEK)) + dccp_finish_passive_close(sk); + /* fall through */ + case DCCP_PKT_RESET: + dccp_pr_debug("found fin (%s) ok!\n", + dccp_packet_name(dh->dccph_type)); len = 0; goto found_fin_ok; + default: + dccp_pr_debug("packet_type=%s\n", + dccp_packet_name(dh->dccph_type)); + sk_eat_skb(sk, skb, 0); } - dccp_pr_debug("packet_type=%s\n", - dccp_packet_name(dh->dccph_type)); - sk_eat_skb(sk, skb, 0); verify_sock_status: if (sock_flag(sk, SOCK_DONE)) { len = 0; @@ -864,28 +890,30 @@ out: EXPORT_SYMBOL_GPL(inet_dccp_listen); -static const unsigned char dccp_new_state[] = { - /* current state: new state: action: */ - [0] = DCCP_CLOSED, - [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, - [DCCP_REQUESTING] = DCCP_CLOSED, - [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, - [DCCP_LISTEN] = DCCP_CLOSED, - [DCCP_RESPOND] = DCCP_CLOSED, - [DCCP_CLOSING] = DCCP_CLOSED, - [DCCP_TIME_WAIT] = DCCP_CLOSED, - [DCCP_CLOSED] = DCCP_CLOSED, -}; - -static int dccp_close_state(struct sock *sk) +static void dccp_terminate_connection(struct sock *sk) { - const int next = dccp_new_state[sk->sk_state]; - const int ns = next & DCCP_STATE_MASK; + u8 next_state = DCCP_CLOSED; - if (ns != sk->sk_state) - dccp_set_state(sk, ns); + switch (sk->sk_state) { + case DCCP_PASSIVE_CLOSE: + case DCCP_PASSIVE_CLOSEREQ: + dccp_finish_passive_close(sk); + break; + case DCCP_PARTOPEN: + dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk); + inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); + /* fall through */ + case DCCP_OPEN: + dccp_send_close(sk, 1); - return next & DCCP_ACTION_FIN; + if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER) + next_state = DCCP_ACTIVE_CLOSEREQ; + else + next_state = DCCP_CLOSING; + /* fall through */ + default: + dccp_set_state(sk, next_state); + } } void dccp_close(struct sock *sk, long timeout) @@ -922,8 +950,8 @@ void dccp_close(struct sock *sk, long ti if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); - } else if (dccp_close_state(sk)) { - dccp_send_close(sk, 1); + } else if (sk->sk_state != DCCP_CLOSED) { + dccp_terminate_connection(sk); } sk_stream_wait_close(sk, timeout); --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -260,7 +260,6 @@ enum dccp_state { }; #define DCCP_STATE_MASK 0x1f -#define DCCP_ACTION_FIN (1<<7) enum { DCCPF_OPEN = TCPF_ESTABLISHED, - To unsubscribe from this list: send the line "unsubscribe dccp" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html