This implements the algorithm to update the allowed sending rate X upon receiving feedback packets, as described in draft rfc3448bis, 4.2/4.3. The patch further removes two irrelevant states in TX feedback handling: * the NO_SENT state is only triggered in bidirectional mode, costing unnecessary processing. * the TERM (terminating) state is irrelevant. Signed-off-by: Gerrit Renker <gerrit@xxxxxxxxxxxxxx> Signed-off-by: Ian McDonald <ian.mcdonald@xxxxxxxxxxx> --- net/dccp/ccids/ccid3.c | 171 ++++++++++++++++++++++++------------------------ 1 files changed, 86 insertions(+), 85 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 18ace1c..c631c3b 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -405,110 +405,111 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) return; + /* ... and only in the established state */ + switch (hctx->ccid3hctx_state) { + case TFRC_SSTATE_NO_FBACK: /* fall through */ + case TFRC_SSTATE_FBACK: break; + default: return; + } + + /* estimate RTT from history if ACK number is valid */ + if (! tfrc_tx_hist_when(&t_send, &hctx->ccid3hctx_hist, + DCCP_SKB_CB(skb)->dccpd_ack_seq)) { + DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk, + dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type), + (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq); + return; + } + opt_recv = &hctx->ccid3hctx_options_received; - switch (hctx->ccid3hctx_state) { - case TFRC_SSTATE_NO_FBACK: - case TFRC_SSTATE_FBACK: - /* estimate RTT from history if ACK number is valid */ - if (! tfrc_tx_hist_when(&t_send, &hctx->ccid3hctx_hist, - DCCP_SKB_CB(skb)->dccpd_ack_seq)) { - DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk, - dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type), - (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq); - return; - } + /* Update receive rate in units of 64 * bytes/second */ + hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate; + hctx->ccid3hctx_x_recv <<= 6; - /* Update receive rate in units of 64 * bytes/second */ - hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate; - hctx->ccid3hctx_x_recv <<= 6; + /* Update loss event rate (which is scaled by 1e6) */ + pinv = opt_recv->ccid3or_loss_event_rate; + if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */ + hctx->ccid3hctx_p = 0; + else /* can not exceed 100% */ + hctx->ccid3hctx_p = scaled_div(1, pinv); - /* Update loss event rate */ - pinv = opt_recv->ccid3or_loss_event_rate; - if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */ - hctx->ccid3hctx_p = 0; - else /* can not exceed 100% */ - hctx->ccid3hctx_p = 1000000 / pinv; + /* + * Calculate new RTT sample and update moving average + */ + now = ktime_get_real(); + r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, t_send)); + hctx->ccid3hctx_rtt = tfrc_ewma(hctx->ccid3hctx_rtt, r_sample, 9); - now = ktime_get_real(); - /* - * Calculate new RTT sample and update moving average - */ - r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, t_send)); - hctx->ccid3hctx_rtt = tfrc_ewma(hctx->ccid3hctx_rtt, r_sample, 9); + /* + * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3 + */ + if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { + + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); - if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { + if (hctx->ccid3hctx_t_rto == 0) { /* - * Larger Initial Windows [RFC 4342, sec. 5] + * Initial feedback packet: Larger Initial Windows (4.2) */ hctx->ccid3hctx_x = rfc3390_initial_rate(sk); hctx->ccid3hctx_t_ld = now; ccid3_update_send_interval(hctx); - ccid3_pr_debug("%s(%p), s=%u, MSS=%u, " - "R_sample=%uus, X=%u\n", dccp_role(sk), - sk, hctx->ccid3hctx_s, - dccp_sk(sk)->dccps_mss_cache, r_sample, - (unsigned)(hctx->ccid3hctx_x >> 6)); + goto done_computing_x; - ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); - } else { - - /* Update sending rate (step 4 of [RFC 3448, 4.3]) */ - if (hctx->ccid3hctx_p > 0) - hctx->ccid3hctx_x_calc = - tfrc_calc_x(hctx->ccid3hctx_s, - hctx->ccid3hctx_rtt, - hctx->ccid3hctx_p); - ccid3_hc_tx_update_x(sk, &now); - - ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, " - "p=%u, X_calc=%u, X_recv=%u, X=%u\n", - dccp_role(sk), - sk, hctx->ccid3hctx_rtt, r_sample, - hctx->ccid3hctx_s, hctx->ccid3hctx_p, - hctx->ccid3hctx_x_calc, - (unsigned)(hctx->ccid3hctx_x_recv >> 6), - (unsigned)(hctx->ccid3hctx_x >> 6)); + } else if (hctx->ccid3hctx_p == 0) { + /* + * First feedback after nofeedback timer expiry (4.3) + */ + goto done_computing_x; } + } + /* perform step (4) of draft rfc3448bis, section 4.3 */ + if (hctx->ccid3hctx_p > 0) + hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s, + hctx->ccid3hctx_rtt, + hctx->ccid3hctx_p); + ccid3_hc_tx_update_x(sk, &now); + +done_computing_x: + ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, p=%u, X_calc=%u, " + "X_recv=%u, X=%u\n", dccp_role(sk), sk, + hctx->ccid3hctx_rtt, r_sample, hctx->ccid3hctx_s, + hctx->ccid3hctx_p, hctx->ccid3hctx_x_calc, + (unsigned)(hctx->ccid3hctx_x_recv >> 6), + (unsigned)(hctx->ccid3hctx_x >> 6)); + + /* unschedule no feedback timer */ + sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); - /* unschedule no feedback timer */ - sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); - - /* - * As we have calculated new ipi, delta, t_nom it is possible - * that we now can send a packet, so wake up dccp_wait_for_ccid - */ - sk->sk_write_space(sk); + /* + * As we have calculated new ipi, delta, t_nom it is possible + * that we now can send a packet, so wake up dccp_wait_for_ccid + */ + sk->sk_write_space(sk); - /* - * Update timeout interval for the nofeedback timer. - * We use a configuration option to increase the lower bound. - * This can help avoid triggering the nofeedback timer too - * often ('spinning') on LANs with small RTTs. - */ - hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, - CONFIG_IP_DCCP_CCID3_RTO * - (USEC_PER_SEC/1000)); - /* - * Schedule no feedback timer to expire in - * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) - */ - t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi); + /* + * Update timeout interval for the nofeedback timer. + * We use a configuration option to increase the lower bound. + * This can help avoid triggering the nofeedback timer too + * often ('spinning') on LANs with small RTTs. + */ + hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, + CONFIG_IP_DCCP_CCID3_RTO * + (USEC_PER_SEC/1000)); + /* + * Schedule no feedback timer to expire in + * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) + */ + t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi); - ccid3_pr_debug("%s(%p), Scheduled no feedback timer to " - "expire in %lu jiffies (%luus)\n", - dccp_role(sk), - sk, usecs_to_jiffies(t_nfb), t_nfb); + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + jiffies + usecs_to_jiffies(t_nfb)); - sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, - jiffies + usecs_to_jiffies(t_nfb)); - break; - case TFRC_SSTATE_NO_SENT: /* fall through */ - case TFRC_SSTATE_TERM: /* ignore feedback when closing */ - break; - } + ccid3_pr_debug("Scheduled no feedback timer to expire in %lu jiffies" + "(%luus)\n", usecs_to_jiffies(t_nfb), t_nfb); } static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, -- 1.5.2.2.238.g7cbf2f2-dirty - To unsubscribe from this list: send the line "unsubscribe dccp" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html