[DCCP]: Handle timestamps on Request/Response exchange separately In DCCP, timestamps can occur on packets anytime, CCID3 uses a timestamp(/echo) on the Request/Response exchange. This patch addresses the following situation: * timestamps are recorded on the listening socket; * Responses are sent from dccp_request_sockets; * suppose two connections reach the listening socket with very small time in between: * the first timestamp value gets overwritten by the second connection request. It may seem unlikely that this bug will occur, since the Response is sent out immediately, but to me this does not seem right. I am further not sure whether the socket locks provide sufficient protection against overwriting timestamp values on the listening socket. This patch therefore * makes memory use for timestamps dynamic (to use less when no timestamps are present); * separates `handshake timestamps' from `established timestamps'; * de-allocates in request socket destructor if previous de-allocation has failed. Furthermore, inserting the Timestamp Echo option has been taken out of the block starting with '!dccp_packet_without_ack()', since Timestamp Echo can be carried on any packet (5.8 and 13.3). Lastly, with sampling RTTs in mind, the earliest-unacknowledged timestamp is always kept on the socket (mimicking RFC 1323). This is not fully worked out, to do a RFC1323-style algorithm requires more work, and possibly some changes; but in can in principle benefit from the provided code. Signed-off-by: Gerrit Renker <gerrit@xxxxxxxxxxxxxx> --- include/linux/dccp.h | 25 +++++++++++++++++++++---- net/dccp/dccp.h | 2 +- net/dccp/ipv4.c | 4 ++++ net/dccp/ipv6.c | 4 ++++ net/dccp/minisocks.c | 4 ++++ net/dccp/options.c | 41 +++++++++++++++++++++++++++-------------- net/dccp/proto.c | 5 +++++ 7 files changed, 66 insertions(+), 19 deletions(-) --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -407,11 +407,30 @@ struct dccp_opt_pend { extern void dccp_minisock_init(struct dccp_minisock *dmsk); +/** + * struct dccp_ts_echo - Record incoming timestamp to echo it later + * @ts_time: arrival time of timestamp + * @ts_echo: the timestamp value recorded at @ts_time + */ +struct dccp_ts_echo { + ktime_t ts_time; + __u32 ts_echo; +}; + +/** + * struct dccp_request_sock - represent DCCP-specific connection request + * @dreq_inet_rsk: structure inherited from + * @dreq_iss: initial sequence number sent on the Response (RFC 4340, 7.1) + * @dreq_isr: initial sequence number received on the Request + * @dreq_service: service code present on the Request (there is just one) + * @dreq_tstamp: most recent timestamp received during connection setup + */ struct dccp_request_sock { struct inet_request_sock dreq_inet_rsk; __u64 dreq_iss; __u64 dreq_isr; __be32 dreq_service; + struct dccp_ts_echo *dreq_tstamp; }; static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req) @@ -477,8 +496,7 @@ struct dccp_ackvec; * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss * @dccps_service - first (passive sock) or unique (active sock) service code * @dccps_service_list - second .. last service code on passive socket - * @dccps_timestamp_time - time of latest TIMESTAMP option - * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option + * @dccps_tstamp - most recently received timestamp to echo (RFC 4340, 13.1) * @dccps_l_ack_ratio - feature-local Ack Ratio * @dccps_r_ack_ratio - feature-remote Ack Ratio * @dccps_pcslen - sender partial checksum coverage (via sockopt) @@ -514,8 +532,7 @@ struct dccp_sock { __u64 dccps_gar; __be32 dccps_service; struct dccp_service_list *dccps_service_list; - ktime_t dccps_timestamp_time; - __u32 dccps_timestamp_echo; + struct dccp_ts_echo *dccps_tstamp; __u16 dccps_l_ack_ratio; __u16 dccps_r_ack_ratio; __u16 dccps_pcslen; --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -56,6 +56,7 @@ int dccp_parse_options(struct sock *sk, const unsigned char *opt_end = (unsigned char *)dh + (dh->dccph_doff * 4); struct dccp_options_received *opt_recv = &dp->dccps_options_received; + struct dccp_ts_echo **tse; unsigned char opt, len; unsigned char *value; u32 elapsed_time, opt_val; @@ -159,12 +160,25 @@ int dccp_parse_options(struct sock *sk, if (len != 4) goto out_invalid_option; + tse = dreq? &dreq->dreq_tstamp : dp->dccps_tstamp; + /* + * Keep the earliest received timestamp on the socket, + * until echoing to the peer frees it. This policy is + * useful for doing RTT measurements (eg. RFC 1323, 3.4) + */ + if (*tse != NULL) + break; + + *tse = kmalloc(sizeof(struct dccp_ts_echo), GFP_ATOMIC); + if (*tse == NULL) { + DCCP_CRIT("can not store received timestamp"); + break; + } + + (*tse)->ts_time = ktime_get_real(); opt_val = get_unaligned((u32 *)value); opt_recv->dccpor_timestamp = ntohl(opt_val); - - /* FIXME: if dreq != NULL, don't store this on listening socket */ - dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; - dp->dccps_timestamp_time = ktime_get_real(); + (*tse)->ts_echo = opt_recv->dccpor_timestamp; dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n", dccp_role(sk), opt_recv->dccpor_timestamp, @@ -384,15 +398,14 @@ int dccp_insert_option_timestamp(struct EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp); -static int dccp_insert_option_timestamp_echo(struct sock *sk, +static int dccp_insert_option_timestamp_echo(struct dccp_ts_echo **tse, struct sk_buff *skb) { - struct dccp_sock *dp = dccp_sk(sk); __be32 tstamp_echo; int len, elapsed_time_len; unsigned char *to; const suseconds_t delta = ktime_us_delta(ktime_get_real(), - dp->dccps_timestamp_time); + (*tse)->ts_time); u32 elapsed_time = delta / 10; elapsed_time_len = dccp_elapsed_time_len(elapsed_time); len = 6 + elapsed_time_len; @@ -406,7 +419,7 @@ static int dccp_insert_option_timestamp_ *to++ = DCCPO_TIMESTAMP_ECHO; *to++ = len; - tstamp_echo = htonl(dp->dccps_timestamp_echo); + tstamp_echo = htonl((*tse)->ts_echo); memcpy(to, &tstamp_echo, 4); to += 4; @@ -418,8 +431,8 @@ static int dccp_insert_option_timestamp_ memcpy(to, &var32, 4); } - dp->dccps_timestamp_echo = 0; - dp->dccps_timestamp_time = ktime_set(0, 0); + kfree(*tse); + *tse = NULL; return 0; } @@ -528,10 +541,6 @@ int dccp_insert_options(struct sock *sk, dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) && dccp_insert_option_ackvec(sk, skb)) return -1; - - if (dp->dccps_timestamp_echo != 0 && - dccp_insert_option_timestamp_echo(sk, skb)) - return -1; } if (dp->dccps_hc_rx_insert_options) { @@ -555,6 +564,10 @@ int dccp_insert_options(struct sock *sk, dccp_insert_option_timestamp(sk, skb)) return -1; + if (dp->dccps_tstamp != NULL && + dccp_insert_option_timestamp_echo(&dp->dccps_tstamp, skb)) + return -1; + /* XXX: insert other options when appropriate */ if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) { --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -413,7 +413,7 @@ static inline void dccp_update_gss(struc static inline int dccp_ack_pending(const struct sock *sk) { const struct dccp_sock *dp = dccp_sk(sk); - return dp->dccps_timestamp_echo != 0 || + return dp->dccps_tstamp != NULL || #ifdef CONFIG_IP_DCCP_ACKVEC (dccp_msk(sk)->dccpms_send_ack_vector && dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) || --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -120,6 +120,7 @@ struct sock *dccp_create_openreq_child(s newdp->dccps_role = DCCP_ROLE_SERVER; newdp->dccps_hc_rx_ackvec = NULL; newdp->dccps_service_list = NULL; + newdp->dccps_tstamp = NULL; newdp->dccps_service = dreq->dreq_service; newicsk->icsk_rto = DCCP_TIMEOUT_INIT; @@ -303,9 +304,12 @@ EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack); void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb) { + struct dccp_request_sock *dreq = dccp_rsk(req); + inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; inet_rsk(req)->acked = 0; req->rcv_wnd = sysctl_dccp_feat_sequence_window; + dreq->dreq_tstamp = NULL; } EXPORT_SYMBOL_GPL(dccp_reqsk_init); --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -254,6 +254,11 @@ int dccp_destroy_sock(struct sock *sk) kfree(dp->dccps_service_list); dp->dccps_service_list = NULL; + if (dp->dccps_tstamp != NULL) { + kfree(dp->dccps_tstamp); + dp->dccps_tstamp = NULL; + } + if (dmsk->dccpms_send_ack_vector) { dccp_ackvec_free(dp->dccps_hc_rx_ackvec); dp->dccps_hc_rx_ackvec = NULL; --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -551,6 +551,10 @@ out: static void dccp_v4_reqsk_destructor(struct request_sock *req) { + struct dccp_request_sock *dreq = dccp_rsk(req); + + if (dreq->dreq_tstamp != NULL) + kfree(dreq->dreq_tstamp); kfree(inet_rsk(req)->opt); } --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -295,6 +295,10 @@ done: static void dccp_v6_reqsk_destructor(struct request_sock *req) { + struct dccp_request_sock *dreq = dccp_rsk(req); + + if (dreq->dreq_tstamp != NULL) + kfree(dreq->dreq_tstamp); if (inet6_rsk(req)->pktopts != NULL) kfree_skb(inet6_rsk(req)->pktopts); } - To unsubscribe from this list: send the line "unsubscribe dccp" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html