Re: [PATCH 8/10]: Larger initial windows

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Quoting Eddie Kohler:
|  Gerrit, everyone,
|  
|  Again, the INTENTION I think was for the 2-4 packets per RTT to apply 
|  IMMEDIATELY, because the Request-Response exchange gave you an initial RTT 
|  estimate.  But this is just not what RFC 4342 says.  When I figure this out 
|  with Sally we will get back to you, and perhaps publish an erratum, allowing 
|  Linux to have the correct behavior while remaining 'standards compliant'.

see previous reply

|  
|  Eddie
|  
|  
|  Gerrit Renker wrote:
|  > [CCID 3]: Larger initial windows
|  > 
|  > This implements the larger-initial-windows feature for CCID 3, as described
|  > in section 5 of RFC 4342. When the first feedback packet arrives, the sender
|  > can send up to 2..4 packets per RTT, instead of just one. 
|  > 
|  > The patch further
|  >  * reduces the number of timestamping calls by passing the timestamp value
|  >    (which is computed in one of the calling functions anyway) as argument
|  > 
|  >  * renames one constant with a very long name into one which is shorter and 
|  >    resembles the one in RFC 3448 (t_mbi)
|  > 
|  >  * simplifies some of the min_t/max_t cases where both `x', `y' have the same
|  >    type
|  > 
|  > Signed-off-by: Gerrit Renker <gerrit@xxxxxxxxxxxxxx>
|  > ---
|  >  net/dccp/ccids/ccid3.c |   75 +++++++++++++++++++++++++++----------------------
|  >  net/dccp/ccids/ccid3.h |    4 +-
|  >  2 files changed, 44 insertions(+), 35 deletions(-)
|  > 
|  > --- a/net/dccp/ccids/ccid3.c
|  > +++ b/net/dccp/ccids/ccid3.c
|  > @@ -129,35 +129,42 @@ static inline void ccid3_update_send_tim
|  >   * If X has changed, we also update the scheduled send time t_now,
|  >   * the inter-packet interval t_ipi, and the delta value.
|  >   */ 
|  > -static void ccid3_hc_tx_update_x(struct sock *sk)
|  > +static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
|  > +
|  >  {
|  >  	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
|  >  	const __u32 old_x = hctx->ccid3hctx_x;
|  >  
|  >  	DCCP_BUG_ON(hctx->ccid3hctx_rtt == 0);
|  >  
|  > -	/* To avoid large error in calcX */
|  > -	if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) {
|  > -		hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s,
|  > -						     hctx->ccid3hctx_rtt,
|  > -						     hctx->ccid3hctx_p);
|  > -		hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc,
|  > -							  2 * hctx->ccid3hctx_x_recv),
|  > -					       (hctx->ccid3hctx_s /
|  > -					        TFRC_MAX_BACK_OFF_TIME));
|  > -	} else {
|  > -		struct timeval now;
|  > +	/* Compute X after the first feedback packet has arrived */
|  > +	if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
|  > +		/* This implements `Larger Initial Windows' [RFC 4342, sec. 5]
|  > +		 * We deviate in that we use `s' instead of `MSS'.           */
|  > +		u16 w_init = max(     4 * hctx->ccid3hctx_s,
|  > +				 max( 2 * hctx->ccid3hctx_s, 4380));
|  > +		hctx->ccid3hctx_x    = usecs_div(w_init, hctx->ccid3hctx_rtt);
|  > +		hctx->ccid3hctx_t_ld = *now;
|  > +
|  > + 	/* To avoid large error in calcX */
|  > +	} else if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) {
|  > + 		hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s,
|  > + 						     hctx->ccid3hctx_rtt,
|  > + 						     hctx->ccid3hctx_p);
|  > +		hctx->ccid3hctx_x = max_t(u32, min(hctx->ccid3hctx_x_calc,
|  > +						   hctx->ccid3hctx_x_recv * 2),
|  > +					       hctx->ccid3hctx_s / TFRC_t_mbi );
|  > +
|  > +	} else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) >=
|  > +							  hctx->ccid3hctx_rtt) {
|  > +		hctx->ccid3hctx_x = max(min(hctx->ccid3hctx_x_recv,
|  > +					    hctx->ccid3hctx_x      ) * 2,
|  > +					usecs_div(hctx->ccid3hctx_s,
|  > +					       	  hctx->ccid3hctx_rtt)   );
|  > +		hctx->ccid3hctx_t_ld = *now;
|  > +	} else
|  > +		ccid3_pr_debug("Not changing X\n");
|  >  
|  > -		dccp_timestamp(sk, &now);
|  > -	       	if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >=
|  > -		    hctx->ccid3hctx_rtt) {
|  > -			hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv,
|  > -								  hctx->ccid3hctx_x) * 2,
|  > -						       usecs_div(hctx->ccid3hctx_s,
|  > -							       	 hctx->ccid3hctx_rtt));
|  > -			hctx->ccid3hctx_t_ld = now;
|  > -		}
|  > -	}
|  >  	if(hctx->ccid3hctx_x != old_x)
|  >  		ccid3_update_send_time(hctx);
|  >  }
|  > @@ -185,6 +192,7 @@ static void ccid3_hc_tx_no_feedback_time
|  >  	struct sock *sk = (struct sock *)data;
|  >  	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
|  >  	unsigned long next_tmout = USEC_PER_SEC / 5;
|  > +	struct timeval now;
|  >  
|  >  	bh_lock_sock(sk);
|  >  	if (sock_owned_by_user(sk)) {
|  > @@ -200,10 +208,8 @@ static void ccid3_hc_tx_no_feedback_time
|  >  	case TFRC_SSTATE_NO_FBACK:
|  >  		/* Halve send rate */
|  >  		hctx->ccid3hctx_x /= 2;
|  > -		if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s /
|  > -					 TFRC_MAX_BACK_OFF_TIME))
|  > -			hctx->ccid3hctx_x = (hctx->ccid3hctx_s /
|  > -					     TFRC_MAX_BACK_OFF_TIME);
|  > +		if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s / TFRC_t_mbi))
|  > +			hctx->ccid3hctx_x = hctx->ccid3hctx_s / TFRC_t_mbi;
|  >  
|  >  		ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d "
|  >  			       "bytes/s\n",
|  > @@ -240,12 +246,13 @@ static void ccid3_hc_tx_no_feedback_time
|  >  			if (hctx->ccid3hctx_p < TFRC_SMALLEST_P ||
|  >  			    hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv)
|  >  				hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2,
|  > -								    hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME));
|  > +								    hctx->ccid3hctx_s / (2 * TFRC_t_mbi));
|  >  			else
|  >  				hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4;
|  >  
|  >  			/* Update sending rate */
|  > -			ccid3_hc_tx_update_x(sk);
|  > +			dccp_timestamp(sk, &now);
|  > +			ccid3_hc_tx_update_x(sk, &now);
|  >  		}
|  >  		/*
|  >  		 * Schedule no feedback timer to expire in
|  > @@ -488,10 +495,9 @@ static void ccid3_hc_tx_packet_recv(stru
|  >  		 *
|  >  		 * q is a constant, RFC 3448 recomments 0.9
|  >  		 */
|  > -		if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
|  > -			ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
|  > +		if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK)
|  >  			hctx->ccid3hctx_rtt = r_sample;
|  > -		} else
|  > +		else
|  >  			hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 +
|  >  					      r_sample / 10;
|  >  
|  > @@ -508,7 +514,11 @@ static void ccid3_hc_tx_packet_recv(stru
|  >  		sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
|  >  
|  >  		/* Update sending rate (and likely t_ipi, t_nom, and delta) */
|  > -		ccid3_hc_tx_update_x(sk);
|  > +		ccid3_hc_tx_update_x(sk, &now);
|  > +
|  > +		/* Update the state if necessary */
|  > +		if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK)
|  > +			ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
|  >  
|  >  		/* remove all packets older than the one acked from history */
|  >  		dccp_tx_hist_purge_older(ccid3_tx_hist,
|  > @@ -519,7 +529,6 @@ static void ccid3_hc_tx_packet_recv(stru
|  >  		 */
|  >  		sk->sk_write_space(sk);
|  >  
|  > -
|  >  		/* Update timeout interval. We use the alternative variant of
|  >  		 * [RFC 3448, 3.1] which sets the upper bound of t_rto to one
|  >  		 * second, as it is suggested for TCP (see RFC 2988, 2.4). */
|  > --- a/net/dccp/ccids/ccid3.h
|  > +++ b/net/dccp/ccids/ccid3.h
|  > @@ -48,8 +48,8 @@
|  >  /* In usecs - half the scheduling granularity as per RFC3448 4.6 */
|  >  #define TFRC_OPSYS_HALF_TIME_GRAN  (USEC_PER_SEC / (2 * HZ))
|  >  
|  > -/* In seconds */
|  > -#define TFRC_MAX_BACK_OFF_TIME	   64
|  > +/* Parameter t_mbi from [RFC 3448, 4.3]: In seconds */
|  > +#define TFRC_t_mbi		   64
|  >  
|  >  #define TFRC_SMALLEST_P		   40
|  >  
|  > -
|  > To unsubscribe from this list: send the line "unsubscribe dccp" in
|  > the body of a message to majordomo@xxxxxxxxxxxxxxx
|  > More majordomo info at  http://vger.kernel.org/majordomo-info.html
|  -
|  To unsubscribe from this list: send the line "unsubscribe dccp" in
|  the body of a message to majordomo@xxxxxxxxxxxxxxx
|  More majordomo info at  http://vger.kernel.org/majordomo-info.html
|  
|  
-
To unsubscribe from this list: send the line "unsubscribe dccp" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel]     [IETF DCCP]     [Linux Networking]     [Git]     [Security]     [Linux Assembly]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux