Re: [ANNOUNCE] TCP Westwood+

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 29 Jan 2004 11:56:07 -0800
"David S. Miller" <davem@redhat.com> wrote:

>On Fri, 16 Jan 2004 19:21:44 +0100
>"Angelo Dell'Aera" <buffer@antifork.org> wrote:
>
>> I modified my patch in such a way as to have TCP Westwood+ always
>> available as you said. Currently I just left in the compile options 
>> for collecting statistics and for debugging (currently not used). 
>
>Please remove all of the config options and ifdefs, please do this
>so I can seriously consider putting this into the tree.

Here you are the patch with the changes you asked for. As you can see,
the options are completely gone since I think they're quite useless if 
you're not doing a performance evaluation test. Moreover I think that 
those persons who still want to do it could easily use the patches which 
live in my homepage. This patch applies against kernel 2.4.24.

Regards.

--

Angelo Dell'Aera 'buffer' 
Antifork Research, Inc.	  	http://buffer.antifork.org



diff -Naur /usr/src/linux-2.4.24/include/linux/sysctl.h /usr/src/linux-2.4.24-westwood/include/linux/sysctl.h
--- /usr/src/linux-2.4.24/include/linux/sysctl.h	2003-12-02 20:49:12.000000000 +0100
+++ /usr/src/linux-2.4.24-westwood/include/linux/sysctl.h	2004-01-16 17:03:43.000000000 +0100
@@ -311,6 +311,7 @@
 	NET_TCP_FRTO=92,
 	NET_TCP_LOW_LATENCY=93,
 	NET_IPV4_IPFRAG_SECRET_INTERVAL=94,
+	NET_TCP_WESTWOOD=95,
 };
 
 enum {
diff -Naur /usr/src/linux-2.4.24/include/net/sock.h /usr/src/linux-2.4.24-westwood/include/net/sock.h
--- /usr/src/linux-2.4.24/include/net/sock.h	2004-01-05 18:27:30.000000000 +0100
+++ /usr/src/linux-2.4.24-westwood/include/net/sock.h	2004-01-16 18:29:56.000000000 +0100
@@ -432,6 +432,21 @@
 	__u32                   frto_highmark; /* snd_nxt when RTO occurred */
 
 	unsigned long last_synq_overflow; 
+
+/* TCP Westwood structure */
+        struct {
+                __u32    bw_sample;        /* bandwidth sample */
+                __u32    bw_ns_est;        /* first bandwidth estimation..not too smoothed 8) */
+                __u32    bw_est;           /* bandwidth estimate */
+                __u32    rtt_win_sx;       /* here starts a new evaluation... */
+                __u32    bk;
+                __u32    snd_una;          /* used for evaluating the number of acked bytes */
+                __u32    cumul_ack;
+                __u32    accounted;
+                __u32    rtt;
+                __u32    rtt_min;          /* minimum observed RTT */
+                rwlock_t lock;
+        } westwood;
 };
 
  	
diff -Naur /usr/src/linux-2.4.24/include/net/tcp.h /usr/src/linux-2.4.24-westwood/include/net/tcp.h
--- /usr/src/linux-2.4.24/include/net/tcp.h	2004-01-05 18:28:51.000000000 +0100
+++ /usr/src/linux-2.4.24-westwood/include/net/tcp.h	2004-01-30 10:14:51.000000000 +0100
@@ -463,6 +463,7 @@
 extern int sysctl_tcp_tw_reuse;
 extern int sysctl_tcp_frto;
 extern int sysctl_tcp_low_latency;
+extern int sysctl_tcp_westwood;
 
 extern atomic_t tcp_memory_allocated;
 extern atomic_t tcp_sockets_allocated;
@@ -1863,4 +1864,124 @@
 	TCP_ADD_STATS_USER(TcpMaxConn, -1);
 }
 
+
+/* TCP Westwood functions and constants */
+
+#define WESTWOOD_INIT_RTT               20*HZ           /* maybe too conservative?! */
+#define WESTWOOD_RTT_MIN                HZ/20           /* 50ms */
+
+
+static inline void westwood_update_rtt(struct tcp_opt *tp, __u32 rtt_seq)
+{
+        if (sysctl_tcp_westwood)
+                tp->westwood.rtt = rtt_seq;
+}
+
+void __westwood_fast_bw(struct sock *, struct sk_buff *);
+void __westwood_slow_bw(struct sock *, struct sk_buff *);
+
+/*
+ * This function initializes fields used in TCP Westwood.
+ * We can't get no information about RTT at this time so
+ * we are forced to set it to 0.
+ */
+
+static inline void __init_westwood(struct sock *sk)
+{
+        struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp);
+
+        tp->westwood.bw_sample = 0;
+        tp->westwood.bw_ns_est = 0;
+        tp->westwood.bw_est = 0;
+        tp->westwood.accounted = 0;
+        tp->westwood.cumul_ack = 0;
+        tp->westwood.rtt_win_sx = tcp_time_stamp;
+        tp->westwood.rtt = WESTWOOD_INIT_RTT;
+        tp->westwood.rtt_min = WESTWOOD_INIT_RTT;
+        tp->westwood.snd_una = tp->snd_una;
+        tp->westwood.lock = RW_LOCK_UNLOCKED;
+}
+
+static inline void init_westwood(struct sock *sk)
+{
+	__init_westwood(sk);
+}
+
+static inline void westwood_fast_bw(struct sock *sk, struct sk_buff *skb)
+{
+        if (sysctl_tcp_westwood)
+                __westwood_fast_bw(sk, skb);
+}
+
+static inline void westwood_slow_bw(struct sock *sk, struct sk_buff *skb)
+{
+        if (sysctl_tcp_westwood)
+                __westwood_slow_bw(sk, skb);
+}
+
+static inline __u32 __westwood_bw_rttmin(struct tcp_opt *tp)
+{
+        return (__u32) ((tp->westwood.bw_est) * (tp->westwood.rtt_min) /
+                        (__u32) (tp->mss_cache));
+}
+
+static inline __u32 westwood_bw_rttmin(struct tcp_opt *tp)
+{
+        __u32 ret = 0;
+	
+        if (sysctl_tcp_westwood)
+                ret = (__u32) (max(__westwood_bw_rttmin(tp), 2U));
+	
+        return ret;
+}
+
+static inline int westwood_ssthresh(struct tcp_opt *tp)
+{
+	int ret = 0;
+	__u32 ssthresh;
+	
+	if (sysctl_tcp_westwood) {
+		
+		if(!(ssthresh = westwood_bw_rttmin(tp)))
+			return ret;
+		
+		tp->snd_ssthresh = ssthresh;    
+		ret = 1;
+	}
+	
+	return ret;
+}
+
+static inline int westwood_cwnd(struct tcp_opt *tp)
+{
+	int ret = 0;
+	__u32 cwnd;
+	
+	if (sysctl_tcp_westwood) {
+		
+		if(!(cwnd = westwood_bw_rttmin(tp)))
+			return ret;
+		
+		tp->snd_cwnd = cwnd;
+		ret = 1;
+	}
+	
+	return ret;
+}
+
+static inline int westwood_complete_cwr(struct tcp_opt *tp) 
+{
+	int ret = 0;
+	
+	if (sysctl_tcp_westwood) {
+		
+		if (westwood_cwnd(tp)) {
+			tp->snd_ssthresh = tp->snd_cwnd;
+			ret = 1;
+		}
+	}
+	
+	return ret;
+}
+
 #endif	/* _TCP_H */
diff -Naur /usr/src/linux-2.4.24/net/ipv4/sysctl_net_ipv4.c /usr/src/linux-2.4.24-westwood/net/ipv4/sysctl_net_ipv4.c
--- /usr/src/linux-2.4.24/net/ipv4/sysctl_net_ipv4.c	2003-06-13 16:51:39.000000000 +0200
+++ /usr/src/linux-2.4.24-westwood/net/ipv4/sysctl_net_ipv4.c	2004-01-16 18:49:37.000000000 +0100
@@ -52,6 +52,9 @@
 static int ip_local_port_range_max[] = { 65535, 65535 };
 #endif
 
+/* From tcp_input.c */
+extern int sysctl_tcp_westwood;
+
 struct ipv4_config ipv4_config;
 
 extern ctl_table ipv4_route_table[];
@@ -229,6 +232,9 @@
 	{NET_IPV4_IPFRAG_SECRET_INTERVAL, "ipfrag_secret_interval",
 	 &sysctl_ipfrag_secret_interval, sizeof(int), 0644, NULL, &proc_dointvec_jiffies, 
 	 &sysctl_jiffies},
+        {NET_TCP_WESTWOOD, "tcp_westwood",
+         &sysctl_tcp_westwood, sizeof(int), 0644, NULL,
+         &proc_dointvec},
 	{0}
 };
 
diff -Naur /usr/src/linux-2.4.24/net/ipv4/tcp_input.c /usr/src/linux-2.4.24-westwood/net/ipv4/tcp_input.c
--- /usr/src/linux-2.4.24/net/ipv4/tcp_input.c	2003-12-02 20:37:57.000000000 +0100
+++ /usr/src/linux-2.4.24-westwood/net/ipv4/tcp_input.c	2004-01-30 10:16:02.000000000 +0100
@@ -61,6 +61,7 @@
  *		Panu Kuhlberg:		Experimental audit of TCP (re)transmission
  *					engine. Lots of bugs are found.
  *		Pasi Sarolahti:		F-RTO for dealing with spurious RTOs
+ *              Angelo Dell'Aera:       TCP Westwood+ support
  */
 
 #include <linux/config.h>
@@ -89,6 +90,8 @@
 int sysctl_tcp_max_orphans = NR_FILE;
 int sysctl_tcp_frto = 0;
 
+int sysctl_tcp_westwood = 0;
+
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
 #define FLAG_DATA_ACKED		0x04 /* This ACK acknowledged new data.		*/
@@ -470,6 +473,8 @@
 		tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
 		tp->rtt_seq = tp->snd_nxt;
 	}
+
+	westwood_update_rtt(tp, tp->srtt >> 3);
 }
 
 /* Calculate rto without backoff.  This is the second half of Van Jacobson's
@@ -1068,7 +1073,9 @@
 	    tp->snd_una == tp->high_seq ||
 	    (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) {
 		tp->prior_ssthresh = tcp_current_ssthresh(tp);
-		tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
+
+		if (!(westwood_ssthresh(tp)))
+			tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
 	}
 	tp->snd_cwnd = 1;
 	tp->snd_cwnd_cnt = 0;
@@ -1380,11 +1387,24 @@
 static void tcp_cwnd_down(struct tcp_opt *tp)
 {
 	int decr = tp->snd_cwnd_cnt + 1;
+	__u32 limit;
+
+	/*
+	 * TCP Westwood
+         * Here limit is evaluated as BWestimation*RTTmin (for obtaining it
+	 * in packets we use mss_cache). If CONFIG_TCP_WESTWOOD is not defined
+	 * westwood_bw_rttmin() returns 0. In such case snd_ssthresh is still 
+	 * used as usual. It prevents other strange cases in which BWE*RTTmin
+	 * could assume value 0. It should not happen but...
+	 */ 
+
+	if (!(limit = westwood_bw_rttmin(tp)))
+		limit = tp->snd_ssthresh/2;
 
 	tp->snd_cwnd_cnt = decr&1;
 	decr >>= 1;
 
-	if (decr && tp->snd_cwnd > tp->snd_ssthresh/2)
+	if (decr && tp->snd_cwnd > limit)
 		tp->snd_cwnd -= decr;
 
 	tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1);
@@ -1528,7 +1548,8 @@
 
 static __inline__ void tcp_complete_cwr(struct tcp_opt *tp)
 {
-	tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
+	if (!(westwood_complete_cwr(tp)))
+		tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
@@ -2016,6 +2037,260 @@
 	tp->frto_counter = (tp->frto_counter + 1) % 3;
 }
 
+/* 
+ * TCP Westwood
+ * Functions needed for estimating bandwidth.
+ */
+
+/*
+ * @westwood_do_filter
+ * Low-pass filter. Implemented using constant coeffients.
+ */
+
+static inline __u32 westwood_do_filter(__u32 a, __u32 b)
+{
+	return( (7*a + b) >> 3);
+}  
+
+static void westwood_filter(struct sock *sk, __u32 delta)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	__u32 sample = (tp->westwood.bk) / delta;
+
+	tp->westwood.bw_ns_est = westwood_do_filter(tp->westwood.bw_ns_est, sample);
+	tp->westwood.bw_est = westwood_do_filter(tp->westwood.bw_est, tp->westwood.bw_ns_est);
+	tp->westwood.bw_sample = sample;
+}
+
+/* @westwood_update_rttmin
+ * It is used to update RTTmin. In this case we MUST NOT use 
+ * WESTWOOD_RTT_MIN minimum bound since we could be on a LAN!
+ */
+
+static inline __u32 westwood_update_rttmin(struct sock *sk)
+{
+	 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	 __u32 rttmin = tp->westwood.rtt_min;
+
+	 if (tp->westwood.rtt == 0)
+		return(rttmin);
+	
+	 if (tp->westwood.rtt < tp->westwood.rtt_min || !rttmin)
+		rttmin = tp->westwood.rtt; 
+
+	 return(rttmin);
+}
+
+/*
+ * @westwood_acked
+ * Evaluate increases for dk. It requires no lock since when it is 
+ * called lock should already be held. Be careful about it!
+ */
+
+static __u32 westwood_acked(struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	return ((tp->snd_una) - (tp->westwood.snd_una));
+} 
+
+/*
+ * @westwood_new_window
+ * It evaluates if we are receiving data inside the same RTT window as
+ * when we started.
+ * Return value:
+ * It returns 0 if we are still evaluating samples in the same RTT
+ * window, 1 if the sample has to be considered in the next window.
+ */
+
+static int westwood_new_window(struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	__u32 left_bound;
+	__u32 rtt;
+	int ret = 0;
+
+	read_lock(&tp->westwood.lock);
+	left_bound = tp->westwood.rtt_win_sx;
+	rtt = max(tp->westwood.rtt, (__u32)WESTWOOD_RTT_MIN);
+	read_unlock(&tp->westwood.lock);
+
+	/*
+	 * A RTT-window has passed. Be careful since if RTT is less than
+	 * 50ms we don't filter but we continue 'building the sample'.
+	 * This minimum limit was choosen since an estimation on small
+	 * time intervals is better to avoid...
+	 * Obvioulsy on a LAN we reasonably will always have
+	 * right_bound = left_bound + WESTWOOD_RTT_MIN
+         */
+
+	if ( (left_bound + rtt) < tcp_time_stamp)
+		ret = 1;
+
+	return ret;
+}
+
+
+/*
+ * @westwood_update_window
+ * It updates RTT evaluation window if it is the right moment to do 
+ * it. If so it calls filter for evaluating bandwidth. Be careful
+ * about __westwood_update_window() since it is called without 
+ * any form of lock. It should be used only for internal purposes.
+ * Call westwood_update_window() instead.
+ */
+
+ 
+static void __westwood_update_window(struct sock *sk, __u32 now)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	__u32 delta = now - tp->westwood.rtt_win_sx;
+
+        if (!delta) 
+                return;
+
+	if (tp->westwood.rtt)
+                westwood_filter(sk, delta);
+
+        tp->westwood.bk = 0;
+        tp->westwood.rtt_win_sx = tcp_time_stamp;
+}
+
+
+static void westwood_update_window(struct sock *sk, __u32 now)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	if(westwood_new_window(sk)) {
+		write_lock(&tp->westwood.lock);
+		__westwood_update_window(sk, now);
+		write_unlock(&tp->westwood.lock);		
+	}
+}	
+
+/*
+ * @__westwood_fast_bw 
+ * It is called when we are in fast path. In particular it is called when
+ * header prediction is successfull. In such case infact update is 
+ * straight forward and doesn't need any particular care.
+ */
+
+void __westwood_fast_bw(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	westwood_update_window(sk, tcp_time_stamp);
+
+	write_lock(&tp->westwood.lock);
+	tp->westwood.bk += westwood_acked(sk); 
+	tp->westwood.snd_una = tp->snd_una;
+	tp->westwood.rtt_min = westwood_update_rttmin(sk);
+	write_unlock(&tp->westwood.lock);
+}
+
+/*
+ * @westwood_mss
+ * This function was inserted just to have the possibility to evaluate
+ * which value of MSS is better. Infact we can use neither mss_cache or
+ * mss_cache. Just testing we will know it!
+ */
+
+static inline __u32 westwood_mss(struct tcp_opt *tp)
+{
+	return ((__u32)(tp->mss_cache));
+}
+
+
+/* 
+ * @tcp_westwood_dupack_update
+ * It updates accounted and cumul_ack when receiving a dupack.
+ */
+
+static void westwood_dupack_update(struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	write_lock(&tp->westwood.lock);
+	tp->westwood.accounted += westwood_mss(tp);
+	tp->westwood.cumul_ack = westwood_mss(tp);
+	write_unlock(&tp->westwood.lock);
+}
+
+
+static inline int westwood_may_change_cumul(struct tcp_opt *tp)
+{
+	return ((tp->westwood.cumul_ack) > westwood_mss(tp));
+}
+
+
+static inline void westwood_partial_update(struct tcp_opt *tp)
+{
+	tp->westwood.accounted -= tp->westwood.cumul_ack;
+	tp->westwood.cumul_ack = westwood_mss(tp);
+}
+
+
+static inline void westwood_complete_update(struct tcp_opt *tp)
+{
+	tp->westwood.cumul_ack -= tp->westwood.accounted;
+	tp->westwood.accounted = 0;
+}
+ 
+/*
+ * @westwood_acked_count
+ * This function evaluates cumul_ack for evaluating dk in case of
+ * delayed or partial acks.
+ */
+ 
+static __u32 westwood_acked_count(struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	tp->westwood.cumul_ack = westwood_acked(sk);
+
+        /* If cumul_ack is 0 this is a dupack since it's not moving
+         * tp->snd_una.
+         */
+
+        if (!(tp->westwood.cumul_ack))
+                westwood_dupack_update(sk);
+
+        if (westwood_may_change_cumul(tp)) {
+		/* Partial or delayed ack */
+		if ((tp->westwood.accounted) >= (tp->westwood.cumul_ack)) 
+			westwood_partial_update(tp);
+		else 
+			westwood_complete_update(tp);
+	}
+
+	tp->westwood.snd_una = tp->snd_una;
+	
+	return(tp->westwood.cumul_ack);
+}	
+
+
+/*
+ * @__westwood_slow_bw
+ * It is called when something is going wrong..even if there could
+ * be no problems! Infact a simple delayed packet may trigger a
+ * dupack. But we need to be careful in such case.
+ */
+
+void __westwood_slow_bw(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+	westwood_update_window(sk, tcp_time_stamp);
+
+	write_lock(&tp->westwood.lock);	
+	tp->westwood.bk += westwood_acked_count(sk);
+	tp->westwood.rtt_min = westwood_update_rttmin(sk);
+	write_unlock(&tp->westwood.lock);
+}		 
+
+/* TCP Westwood routines end here */
+
 /* This routine deals with incoming acks, but not outgoing ones. */
 static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 {
@@ -2042,6 +2317,7 @@
 		 */
 		tcp_update_wl(tp, ack, ack_seq);
 		tp->snd_una = ack;
+		westwood_fast_bw(sk, skb);
 		flag |= FLAG_WIN_UPDATE;
 
 		NET_INC_STATS_BH(TCPHPAcks);
@@ -2058,6 +2334,8 @@
 
 		if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th))
 			flag |= FLAG_ECE;
+
+		westwood_slow_bw(sk, skb);
 	}
 
 	/* We passed data and got it acked, remove any soft error
@@ -3796,7 +4074,6 @@
 	return 1;
 }
 
-
 /*
  *	This function implements the receiving procedure of RFC 793 for
  *	all states except ESTABLISHED and TIME_WAIT. 
@@ -3827,6 +4104,8 @@
 			if(tp->af_specific->conn_request(sk, skb) < 0)
 				return 1;
 
+			init_westwood(sk);
+
 			/* Now we have several options: In theory there is 
 			 * nothing else in the frame. KA9Q has an option to 
 			 * send data with the syn, BSD accepts data with the
@@ -3848,6 +4127,9 @@
 		goto discard;
 
 	case TCP_SYN_SENT:
+
+		init_westwood(sk);
+		
 		queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
 		if (queued >= 0)
 			return queued;
-
: send the line "unsubscribe linux-net" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Netdev]     [Ethernet Bridging]     [Linux 802.1Q VLAN]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Git]     [Bugtraq]     [Yosemite News and Information]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux PCI]     [Linux Admin]     [Samba]

  Powered by Linux