[PATCH] [RFC]: Fine-grained sending rate resolution / division errors

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Here is now the patch, I have had a hard time due to division of u64 by
u32. I have also checked again - it is feasible to prune X_calc and
X_recv back to u32. If you prefer that, I can change the patch. 

I'd welcome suggestions about how to improve the division, I had wanted
to at least not throw away the remainder. 

Maybe in future time everything could be scaled by powers of 2, would
make things easier.

Gerrit
-------------------------> Commit Message <---------------------------
[DCCP]: Resolve lower sending rates, avoid integer division error

This patch
 * resolves a bug in CCID 3 where packets smaller than 32/64 bytes
   resulted in sending rates of 0
 * resolves sending rates for all practically applicable cases
 * simplifies the problem that, due to the necessary scaling, overflow
   is likely
 * it implements the strategy from 
   http://www.mail-archive.com/dccp@xxxxxxxxxxxxxxx/msg00938.html

Signed-off-by: Gerrit Renker <gerrit@xxxxxxxxxxxxxx>   
---
 include/linux/tfrc.h   |   11 +++++--
 net/dccp/ccids/ccid3.c |   70 +++++++++++++++++++------------------------------
 net/dccp/ccids/ccid3.h |   20 +++++++++-----
 3 files changed, 49 insertions(+), 52 deletions(-)

--- a/include/linux/tfrc.h
+++ b/include/linux/tfrc.h
@@ -37,11 +37,16 @@ struct tfrc_rx_info {
  * 	@tfrctx_p:	current loss event rate (5.4)
  * 	@tfrctx_rto:	estimate of RTO, equals 4*RTT (4.3)
  * 	@tfrctx_ipi:	inter-packet interval (4.6)
+ *
+ *  NOTE: The sender needs to store fractional sending rates in the range 1/64
+ *  (smallest sending rate: a packet of 1 byte every 64 seconds) up to 4 Gbyte
+ *  per second (IPv6 max jumbogram). Hence we store _all_ sending rates in units
+ *  of 64 * bytes/second - hence the use of __u64.
  */
 struct tfrc_tx_info {
-	__u32 tfrctx_x;
-	__u32 tfrctx_x_recv;
-	__u32 tfrctx_x_calc;
+	__u64 tfrctx_x;
+	__u64 tfrctx_x_recv;
+	__u64 tfrctx_x_calc;
 	__u32 tfrctx_rtt;
 	__u32 tfrctx_p;
 	__u32 tfrctx_rto;
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -41,27 +41,6 @@
 #include "lib/tfrc.h"
 #include "ccid3.h"
 
-/*
- * Reason for maths here is to avoid 32 bit overflow when a is big.
- * With this we get close to the limit.
- */
-static u32 usecs_div(const u32 a, const u32 b)
-{
-	const u32 div = a < (UINT_MAX / (USEC_PER_SEC /    10)) ?    10 :
-			a < (UINT_MAX / (USEC_PER_SEC /    50)) ?    50 :
-			a < (UINT_MAX / (USEC_PER_SEC /   100)) ?   100 :
-			a < (UINT_MAX / (USEC_PER_SEC /   500)) ?   500 :
-			a < (UINT_MAX / (USEC_PER_SEC /  1000)) ?  1000 :
-			a < (UINT_MAX / (USEC_PER_SEC /  5000)) ?  5000 :
-			a < (UINT_MAX / (USEC_PER_SEC / 10000)) ? 10000 :
-			a < (UINT_MAX / (USEC_PER_SEC / 50000)) ? 50000 :
-								 100000;
-	const u32 tmp = a * (USEC_PER_SEC / div);
-	return (b >= 2 * div) ? tmp / (b / div) : tmp;
-}
-
-
-
 #ifdef CONFIG_IP_DCCP_CCID3_DEBUG
 static int ccid3_debug;
 #define ccid3_pr_debug(format, a...)	DCCP_PR_DEBUG(ccid3_debug, format, ##a)
@@ -109,7 +88,8 @@ static inline void ccid3_update_send_tim
 	timeval_sub_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
 
 	/* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */
-	hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_x);
+	hctx->ccid3hctx_t_ipi = scaled_div(hctx->ccid3hctx_s,
+					   hctx->ccid3hctx_x >> 6);
 
 	/* Update nominal send time with regard to the new t_ipi */
 	timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
@@ -130,6 +110,10 @@ static inline void ccid3_update_send_tim
  *
  * If X has changed, we also update the scheduled send time t_now,
  * the inter-packet interval t_ipi, and the delta value.
+ *
+ * NOTE: X, X_calc, and X_recv are all stored in units of 64bytes / second in
+ *       order to enable a minimal resolution of 1byte / 64second. This is the
+ *       smallest possible `s' with the minimal sending rate from RFC 3448.
  */ 
 static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
 
@@ -143,16 +127,17 @@ static void ccid3_hc_tx_update_x(struct 
 		hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s,
 						     hctx->ccid3hctx_rtt,
 						     hctx->ccid3hctx_p);
-		hctx->ccid3hctx_x = max_t(u32, min(hctx->ccid3hctx_x_calc,
+		hctx->ccid3hctx_x_calc <<= 6;	/* in units of 64bytes/sec */
+		hctx->ccid3hctx_x = max_t(u64, min(hctx->ccid3hctx_x_calc,
 						   hctx->ccid3hctx_x_recv * 2),
-					       hctx->ccid3hctx_s / TFRC_T_MBI);
+					  (hctx->ccid3hctx_s <<6) / TFRC_T_MBI);
 
 	} else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) >=
 							  hctx->ccid3hctx_rtt) {
-		hctx->ccid3hctx_x = max(min(hctx->ccid3hctx_x_recv,
-					    hctx->ccid3hctx_x      ) * 2,
-					usecs_div(hctx->ccid3hctx_s,
-					       	  hctx->ccid3hctx_rtt)   );
+		hctx->ccid3hctx_x = max( 2  *  min(hctx->ccid3hctx_x_recv,
+					    	   hctx->ccid3hctx_x      ),
+					scaled_div(hctx->ccid3hctx_s << 6,
+						   hctx->ccid3hctx_rtt    ));
 		hctx->ccid3hctx_t_ld = *now;
 	} else
 		ccid3_pr_debug("Not changing X\n");
@@ -199,14 +184,14 @@ static void ccid3_hc_tx_no_feedback_time
 	switch (hctx->ccid3hctx_state) {
 	case TFRC_SSTATE_NO_FBACK:
 		/* RFC 3448, 4.4: Halve send rate directly */
-		hctx->ccid3hctx_x = min_t(u32, hctx->ccid3hctx_x / 2,
-					       hctx->ccid3hctx_s / TFRC_T_MBI);
+		hctx->ccid3hctx_x = min_t(u64, hctx->ccid3hctx_x / 2,
+				  	      (hctx->ccid3hctx_s << 6) / TFRC_T_MBI);
 
 		ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d "
 			       "bytes/s\n",
 			       dccp_role(sk), sk,
 			       ccid3_tx_state_name(hctx->ccid3hctx_state),
-			       hctx->ccid3hctx_x);
+			       (int) hctx->ccid3hctx_x >> 6);
 		/* The value of R is still undefined and so we can not recompute
 		 * the timout value. Keep initial value as per [RFC 4342, 5]. */
 		t_nfb = TFRC_INITIAL_TIMEOUT;
@@ -219,13 +204,13 @@ static void ccid3_hc_tx_no_feedback_time
 		 */
 		if (!hctx->ccid3hctx_idle ||
 		    (hctx->ccid3hctx_x_recv >=
-		     4 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt))) {
+		     		4 * scaled_div(hctx->ccid3hctx_s << 6,
+					       hctx->ccid3hctx_rtt    ) ) ) {
 			struct timeval now;
 
 			ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n",
 				       dccp_role(sk), sk,
 				       ccid3_tx_state_name(hctx->ccid3hctx_state));
-			/* Halve sending rate */
 
 			/*  If (X_calc > 2 * X_recv)
 			 *    X_recv = max(X_recv / 2, s / (2 * t_mbi));
@@ -236,8 +221,9 @@ static void ccid3_hc_tx_no_feedback_time
 			       hctx->ccid3hctx_x_calc == 0);
 
 			if (hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv)
-				hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2,
-								    hctx->ccid3hctx_s / (2 * TFRC_T_MBI));
+				hctx->ccid3hctx_x_recv =
+					max_t(u64, hctx->ccid3hctx_x_recv / 2,
+					          (hctx->ccid3hctx_s << 6)/(2 * TFRC_T_MBI));
 			else
 				hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4;
 
@@ -317,9 +303,9 @@ static int ccid3_hc_tx_send_packet(struc
 		hctx->ccid3hctx_t_last_win_count = now;
 		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
 
-		/* Set initial sending rate to 1 packet per second */
+		/* Set initial sending rate to 1pps: X = s * 64 bytes/sec */
 		ccid3_hc_tx_update_s(hctx, skb->len);
-		hctx->ccid3hctx_x     = hctx->ccid3hctx_s;
+		hctx->ccid3hctx_x     = hctx->ccid3hctx_s << 6;
 
 		/* First timeout, according to [RFC 3448, 4.2], is 1 second */
 		hctx->ccid3hctx_t_ipi = USEC_PER_SEC;
@@ -443,8 +429,8 @@ static void ccid3_hc_tx_packet_recv(stru
 			return;
 		}
 
-		/* Update receive rate */
-		hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
+		/* Update receive rate in units of 64 * bytes/second */
+		hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate << 6;
 
 		/* Update loss event rate */
 		if (pinv == ~0 || pinv == 0)
@@ -477,7 +463,7 @@ static void ccid3_hc_tx_packet_recv(stru
 			u16 w_init = max(    4 * hctx->ccid3hctx_s,
 					 max(2 * hctx->ccid3hctx_s, 4380));
 			hctx->ccid3hctx_rtt  = r_sample;
-			hctx->ccid3hctx_x    = usecs_div(w_init, r_sample);
+			hctx->ccid3hctx_x    = scaled_div(w_init <<6, r_sample);
 			hctx->ccid3hctx_t_ld = now;
 
 			ccid3_update_send_time(hctx);
@@ -696,7 +682,7 @@ static void ccid3_hc_rx_send_feedback(st
 	case TFRC_RSTATE_DATA: {
 		const u32 delta = timeval_delta(&now,
 					&hcrx->ccid3hcrx_tstamp_last_feedback);
-		hcrx->ccid3hcrx_x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv,
+		hcrx->ccid3hcrx_x_recv = scaled_div(hcrx->ccid3hcrx_bytes_recv,
 						   delta);
 	}
 		break;
@@ -822,7 +808,7 @@ found:
 
 	dccp_timestamp(sk, &tstamp);
 	delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback);
-	x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta);
+	x_recv = scaled_div(hcrx->ccid3hcrx_bytes_recv, delta);
 
 	if (x_recv == 0)
 		x_recv = hcrx->ccid3hcrx_x_recv;
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -75,14 +75,14 @@ enum ccid3_hc_tx_states {
 
 /** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket
  *
- * @ccid3hctx_x - Current sending rate
- * @ccid3hctx_x_recv - Receive rate in bytes per second
- * @ccid3hctx_x_calc - Calculated send rate (RFC 3448, 3.1)
- * @ccid3hctx_rtt - Estimate of current round trip time in usecs
+ * @ccid3hctx_x - Current sending rate in 64 * bytes per second
+ * @ccid3hctx_x_recv - Receive rate    in 64 * bytes per second
+ * @ccid3hctx_x_calc - Calculated rate in 64 * bytes per second
+ * @ccid3hctx_rtt - Estimate of current round trip time           in usecs
  * @ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
- * @ccid3hctx_s - Packet size
- * @ccid3hctx_t_rto - Retransmission Timeout (RFC 3448, 3.1)
- * @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6)
+ * @ccid3hctx_s - Packet size in bytes
+ * @ccid3hctx_t_rto - Retransmission Timeout (RFC 3448, 3.1)      in usecs
+ * @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs
  * @ccid3hctx_state - Sender state, one of %ccid3_hc_tx_states
  * @ccid3hctx_last_win_count - Last window counter sent
  * @ccid3hctx_t_last_win_count - Timestamp of earliest packet
@@ -171,4 +171,10 @@ static inline struct ccid3_hc_rx_sock *c
     return ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid);
 }
 
+static inline u64 scaled_div(u64 a, u32 b)
+{
+	a *= 1000000;
+	do_div(a, b);
+	return a;
+}
 #endif /* _DCCP_CCID3_H_ */

-
To unsubscribe from this list: send the line "unsubscribe dccp" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel]     [IETF DCCP]     [Linux Networking]     [Git]     [Security]     [Linux Assembly]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux