[PATCH 4/17] [DCCP]: Ackvec patches by Bittau

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



These patches were written by Andrea Bittau and 
were posted and signed-off by him as 

	 01_ackvec_opt
	 02_ackvec_speed
	 03_ackvec_ccid_speed

Given that he achieved Gigabit speed with these patches, I think they
should really be `in'.


Original commit messages:
=========================

[ACKVEC]: Split long ack vectors across multiple options
  ack vectors grow proportional to the window size.  If an ack vector does not fit
  into a single option, it must be spread across multiple options.  This patch
  will allow for windows to grow larger.
  http://www.mail-archive.com/dccp@xxxxxxxxxxxxxxx/msg00399.html

[ACKVEC - Optimization]:  Do not traverse records if none will be found
  Do not traverse the list of ack vector records [proportional to window size]
  when we know we will not find what we are looking for.  This is especially
  useful because ack vectors are checked twice:
    1) Upon parsing of options.
    2) Upon notification of a new ack.

  All of the work will occur during check #1.  Therefore, when check #2 is
  performed, no new work will be done.  This is now "detected" and there is no
  performance hit when doing #2.
  http://www.mail-archive.com/dccp@xxxxxxxxxxxxxxx/msg00400.html

[ACKVEC & CCID2]: Code optimizations
   These are code optimizations which are relevant when dealing with large windows.
   They are not coded the way I would like to, but they do the job for the
   short-term.  This patch should be more neat.
   http://www.mail-archive.com/dccp@xxxxxxxxxxxxxxx/msg00401.html


Signed-off-by: Gerrit Renker  <gerrit@xxxxxxxxxxxxxx>
------------------------------------------------------------------------------

 net/dccp/ackvec.c      |  115 ++++++++++++++++++++++++++++++-------------------
 net/dccp/ackvec.h      |   22 ++++-----
 net/dccp/ccids/ccid2.c |   22 ++++++++-
 net/dccp/ccids/ccid2.h |    3 -
 net/dccp/options.c     |    7 +-
 5 files changed, 109 insertions(+), 60 deletions(-)

------------------------------------------------------------------------------

diff --git a/net/dccp/options.c b/net/dccp/options.c
index fb0db1f..81c53e1 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -76,6 +76,7 @@ #endif
 	u32 elapsed_time;
 	int rc;
 	int mandatory = 0;
+	u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
 
 	memset(opt_recv, 0, sizeof(*opt_recv));
 
@@ -152,9 +153,9 @@ #endif
 			if (pkt_type == DCCP_PKT_DATA)
 				break;
 
-			if (dccp_msk(sk)->dccpms_send_ack_vector &&
-			    dccp_ackvec_parse(sk, skb, opt, value, len))
-				goto out_invalid_option;
+			if (dccp_msk(sk)->dccpms_send_ack_vector)
+				ackno = dccp_ackvec_parse(sk, ackno, opt,
+							  value, len);
 			break;
 		case DCCPO_TIMESTAMP:
 			if (len != 4)
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 5b2ef4a..ebd7949 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -35,7 +35,7 @@ struct ccid2_seq {
 	struct ccid2_seq	*ccid2s_next;
 };
 
-#define CCID2_SEQBUF_LEN 256
+#define CCID2_SEQBUF_LEN 1024
 #define CCID2_SEQBUF_MAX 128
 
 /** struct ccid2_hc_tx_sock - CCID2 TX half connection
@@ -72,6 +72,7 @@ struct ccid2_hc_tx_sock {
 	int			ccid2hctx_rpdupack;
 	int			ccid2hctx_sendwait;
 	unsigned long		ccid2hctx_last_cong;
+	u64			ccid2hctx_high_ack;
 };
 
 struct ccid2_hc_rx_sock {
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 2fbb84b..e0f85e9 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -618,7 +618,17 @@ static void ccid2_hc_tx_packet_recv(stru
 	}
 
 	ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
-	seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
+	if (ackno > hctx->ccid2hctx_high_ack)
+		hctx->ccid2hctx_high_ack = ackno;
+
+	seqp = hctx->ccid2hctx_seqt;
+	while (before48(seqp->ccid2s_seq, ackno)) {
+		seqp = seqp->ccid2s_next;
+		if (seqp == hctx->ccid2hctx_seqh) {
+			seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
+			break;
+		}
+	}
 
 	/* If in slow-start, cwnd can increase at most Ack Ratio / 2 packets for
 	 * this single ack.  I round up.
@@ -695,7 +705,14 @@ static void ccid2_hc_tx_packet_recv(stru
 	/* The state about what is acked should be correct now
 	 * Check for NUMDUPACK
 	 */
-	seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
+	seqp = hctx->ccid2hctx_seqt;
+	while (seqp->ccid2s_seq < hctx->ccid2hctx_high_ack) {
+		seqp = seqp->ccid2s_next;
+		if (seqp == hctx->ccid2hctx_seqh) {
+			seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
+			break;
+		}
+	}
 	done = 0;
 	while (1) {
 		if (seqp->ccid2s_acked) {
@@ -769,6 +786,7 @@ static int ccid2_hc_tx_init(struct ccid 
 	hctx->ccid2hctx_lastrtt  = 0;
 	hctx->ccid2hctx_rpdupack = -1;
 	hctx->ccid2hctx_last_cong = jiffies;
+	hctx->ccid2hctx_high_ack = 0;
 
 	hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire;
 	hctx->ccid2hctx_rtotimer.data	  = (unsigned long)sk;
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index cf8f20c..da1bec8 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -17,7 +17,9 @@ #include <linux/time.h>
 #include <linux/types.h>
 
 /* Read about the ECN nonce to see why it is 253 */
-#define DCCP_MAX_ACKVEC_LEN 253
+#define DCCP_MAX_ACKVEC_OPT_LEN 253
+/* We can spread an ack vector across multiple options */
+#define DCCP_MAX_ACKVEC_LEN     (DCCP_MAX_ACKVEC_OPT_LEN * 2)
 
 #define DCCP_ACKVEC_STATE_RECEIVED	0
 #define DCCP_ACKVEC_STATE_ECN_MARKED	(1 << 6)
@@ -31,7 +33,7 @@ #define DCCP_ACKVEC_LEN_MASK		0x3F /* 00
  * This data structure is the one defined in RFC 4340, Appendix A.
  *
  * @dccpav_buf_head - circular buffer head
- * @dccpav_buf_tail - circular buffer tail
+ * @dccpav_vec_len -
  * @dccpav_buf_ackno - ack # of the most recent packet acknowledgeable in the
  * 		       buffer (i.e. %dccpav_buf_head)
  * @dccpav_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
@@ -41,20 +43,19 @@ #define DCCP_ACKVEC_LEN_MASK		0x3F /* 00
  * Ack Vectors it has recently sent. For each packet sent carrying an
  * Ack Vector, it remembers four variables:
  *
- * @dccpav_ack_ptr - the value of buf_head at the time of acknowledgement.
  * @dccpav_records - list of dccp_ackvec_record
+ * @dccpav_buf_nonce -
  * @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
  *
- * @dccpav_time		- the time in usecs
+ * @dccpav_time	- the time in usecs
  * @dccpav_buf - circular buffer of acknowledgeable packets
  */
 struct dccp_ackvec {
 	u64		dccpav_buf_ackno;
 	struct list_head dccpav_records;
 	struct timeval	dccpav_time;
-	u8		dccpav_buf_head;
-	u8		dccpav_ack_ptr;
-	u8		dccpav_vec_len;
+	int		dccpav_buf_head;
+	int		dccpav_vec_len;
 	u8		dccpav_buf_nonce;
 	u8		dccpav_ack_nonce;
 	u8		dccpav_buf[DCCP_MAX_ACKVEC_LEN];
@@ -77,9 +78,9 @@ struct dccp_ackvec_record {
 	struct list_head dccpavr_node;
 	u64		 dccpavr_ack_seqno;
 	u64		 dccpavr_ack_ackno;
-	u8		 dccpavr_ack_ptr;
+	int		 dccpavr_ack_ptr;
 	u8		 dccpavr_ack_nonce;
-	u8		 dccpavr_sent_len;
+	int		 dccpavr_sent_len;
 };
 
 struct sock;
@@ -94,10 +95,9 @@ extern void dccp_ackvec_free(struct dccp
 
 extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 			   const u64 ackno, const u8 state);
-
 extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
 					struct sock *sk, const u64 ackno);
-extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
+extern u64 dccp_ackvec_parse(struct sock *sk, u64 ackno,
 			     const u8 opt, const u8 *value, const u8 len);
 
 extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb);
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 01345df..f18f76e 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -72,11 +72,18 @@ #ifdef CONFIG_IP_DCCP_DEBUG
 				"CLIENT tx: " : "server tx: ";
 #endif
 	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
-	int len = av->dccpav_vec_len + 2;
+	int len;
 	struct timeval now;
 	u32 elapsed_time;
 	unsigned char *to, *from;
 	struct dccp_ackvec_record *avr;
+	int optc, i;
+
+	/* Figure out how many options do we need to represent the ackvec */
+	optc = av->dccpav_vec_len / DCCP_MAX_ACKVEC_OPT_LEN;
+	if (av->dccpav_vec_len % DCCP_MAX_ACKVEC_OPT_LEN)
+		optc++;
+	len = optc*2 + av->dccpav_vec_len;
 
 	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
 		return -1;
@@ -94,24 +101,42 @@ #endif
 
 	DCCP_SKB_CB(skb)->dccpd_opt_len += len;
 
-	to    = skb_push(skb, len);
-	*to++ = DCCPO_ACK_VECTOR_0;
-	*to++ = len;
-
-	len  = av->dccpav_vec_len;
+	to   = skb_push(skb, len);
 	from = av->dccpav_buf + av->dccpav_buf_head;
+	len  = av->dccpav_vec_len;
+
+	/* write the vectors */
+	for (i = 0; i < optc; i++) {
+		int copylen = len;
+		if (copylen > DCCP_MAX_ACKVEC_OPT_LEN)
+			copylen = DCCP_MAX_ACKVEC_OPT_LEN;
+
+		*to++ = DCCPO_ACK_VECTOR_0;
+		*to++ = copylen+2;
 
-	/* Check if buf_head wraps */
-	if ((int)av->dccpav_buf_head + len > DCCP_MAX_ACKVEC_LEN) {
-		const u32 tailsize = DCCP_MAX_ACKVEC_LEN - av->dccpav_buf_head;
+		/* Check if buf_head wraps */
+		if ((from + copylen) > &av->dccpav_buf[DCCP_MAX_ACKVEC_LEN]) {
+			u32 tailsize = &av->dccpav_buf[DCCP_MAX_ACKVEC_LEN] -
+				       from;
 
-		memcpy(to, from, tailsize);
-		to   += tailsize;
-		len  -= tailsize;
-		from = av->dccpav_buf;
+			memcpy(to, from, tailsize);
+			to	+= tailsize;
+			from     = av->dccpav_buf;
+			copylen	-= tailsize;
+			len     -= tailsize;
+
+			BUG_ON((from + copylen) >
+			       &av->dccpav_buf[DCCP_MAX_ACKVEC_LEN]);
+		}
+
+		memcpy(to, from, copylen);
+		to   += copylen;
+		from += copylen;
+
+		len -= copylen;
 	}
+	BUG_ON(len != 0);
 
-	memcpy(to, from, len);
 	/*
 	 *	From RFC 4340, A.2:
 	 *
@@ -145,7 +170,6 @@ struct dccp_ackvec *dccp_ackvec_alloc(co
 		av->dccpav_buf_head	= DCCP_MAX_ACKVEC_LEN - 1;
 		av->dccpav_buf_ackno	= DCCP_MAX_SEQNO + 1;
 		av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
-		av->dccpav_ack_ptr	= 0;
 		av->dccpav_time.tv_sec	= 0;
 		av->dccpav_time.tv_usec	= 0;
 		av->dccpav_vec_len	= 0;
@@ -174,13 +198,13 @@ void dccp_ackvec_free(struct dccp_ackvec
 }
 
 static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
-				   const u8 index)
+				   const int index)
 {
 	return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK;
 }
 
 static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
-				 const u8 index)
+				 const int index)
 {
 	return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK;
 }
@@ -280,7 +304,7 @@ int dccp_ackvec_add(struct dccp_ackvec *
 		 *	could reduce the complexity of this scan.)
 		 */
 		u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno);
-		u8 index = av->dccpav_buf_head;
+		int index = av->dccpav_buf_head;
 
 		while (1) {
 			const u8 len = dccp_ackvec_len(av, index);
@@ -389,30 +413,35 @@ #endif
 				      (unsigned long long)avr->dccpavr_ack_ackno);
 			dccp_ackvec_throw_record(av, avr);
 			break;
-		}
+		} else if (avr->dccpavr_ack_seqno > ackno)
+			break; /* old news */
 	}
 }
 
-static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
-					    struct sock *sk, u64 ackno,
-					    const unsigned char len,
-					    const unsigned char *vector)
+static u64 dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
+					   struct sock *sk, u64 ackno,
+					   const unsigned char len,
+					   const unsigned char *vector)
 {
 	unsigned char i;
 	struct dccp_ackvec_record *avr;
+	int use_head = 1;
 
 	/* Check if we actually sent an ACK vector */
 	if (list_empty(&av->dccpav_records))
-		return;
+		return ackno; /* It's OK, because we will always return */
 
 	i = len;
-	/*
-	 * XXX
-	 * I think it might be more efficient to work backwards. See comment on
-	 * rcv_ackno. -sorbo.
-	 */
-	avr = list_entry(av->dccpav_records.next, struct dccp_ackvec_record,
-			 dccpavr_node);
+	list_for_each_entry_reverse(avr, &av->dccpav_records, dccpavr_node) {
+		if (!before48(avr->dccpavr_ack_seqno, ackno)) {
+			use_head = 0;
+			break;
+		}
+	}
+	if (use_head)
+		avr = list_entry(av->dccpav_records.next,
+				 struct dccp_ackvec_record, dccpavr_node);
+
 	while (i--) {
 		const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
 		u64 ackno_end_rl;
@@ -429,7 +458,10 @@ static void dccp_ackvec_check_rcv_ackvec
 				goto found;
 		}
 		/* End of the dccpav_records list, not found, exit */
-		break;
+		break; /* Do not need to return a correct ackno because the next
+		        * batch of ackvectors will not make situation better
+			* (their acknos will be lower)
+			*/
 found:
 		if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, ackno)) {
 			const u8 state = *vector & DCCP_ACKVEC_STATE_MASK;
@@ -449,30 +481,27 @@ #endif
 					      (unsigned long long)
 					      avr->dccpavr_ack_ackno);
 				dccp_ackvec_throw_record(av, avr);
-				break;
+				break; /* Don't care about other ACKs because we
+				        * released the most possible state
+					*/
 			}
 			/*
 			 * If it wasn't received, continue scanning... we might
 			 * find another one.
 			 */
 		}
-
 		dccp_set_seqno(&ackno, ackno_end_rl - 1);
 		++vector;
 	}
+	return ackno;
 }
 
-int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
-		      const u8 opt, const u8 *value, const u8 len)
+inline u64 dccp_ackvec_parse(struct sock *sk, u64 ackno,
+				    const u8 opt, const u8 *value, const u8 len)
 {
-	if (len > DCCP_MAX_ACKVEC_LEN)
-		return -1;
-
 	/* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */
-	dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk,
-					DCCP_SKB_CB(skb)->dccpd_ack_seq,
-				        len, value);
-	return 0;
+	return dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec,
+					       sk, ackno, len, value);
 }
 
 static char dccp_ackvec_slab_msg[] __initdata =


-
To unsubscribe from this list: send the line "unsubscribe dccp" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel]     [IETF DCCP]     [Linux Networking]     [Git]     [Security]     [Linux Assembly]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux