[PATCH 2/5 2.6.29-rc] cxgb3i - transmit work-request fixes

Karen Xie <kxie@xxxxxxxxxxx> · Mon, 9 Feb 2009 18:10:16 -0800

[PATCH 2/5 2.6.29-rc] cxgb3i - transmit work-request fixes

From: Karen Xie <kxie@xxxxxxxxxxx>

- resize the work-request credit array to be based on skb's MAX_SKB_FRAGS.
- split the skb cb into tx and rx portion
- increase the default transmit window to 128K.
- stop queueing up the outgoing pdus if transmit window is full.

Signed-off-by: Karen Xie <kxie@xxxxxxxxxxx>
---

 drivers/scsi/cxgb3i/cxgb3i_offload.c |  146 ++++++++++++++++++++++++----------
 drivers/scsi/cxgb3i/cxgb3i_offload.h |   28 ++++---
 2 files changed, 121 insertions(+), 53 deletions(-)

diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.c b/drivers/scsi/cxgb3i/cxgb3i_offload.c
index a865f1f..de3b3b6 100644
--- a/drivers/scsi/cxgb3i/cxgb3i_offload.c
+++ b/drivers/scsi/cxgb3i/cxgb3i_offload.c
@@ -23,19 +23,19 @@
 #include "cxgb3i_ddp.h"
 
 #ifdef __DEBUG_C3CN_CONN__
-#define c3cn_conn_debug         cxgb3i_log_info
+#define c3cn_conn_debug		cxgb3i_log_debug
 #else
 #define c3cn_conn_debug(fmt...)
 #endif
 
 #ifdef __DEBUG_C3CN_TX__
-#define c3cn_tx_debug         cxgb3i_log_debug
+#define c3cn_tx_debug		cxgb3i_log_debug
 #else
 #define c3cn_tx_debug(fmt...)
 #endif
 
 #ifdef __DEBUG_C3CN_RX__
-#define c3cn_rx_debug         cxgb3i_log_debug
+#define c3cn_rx_debug		cxgb3i_log_debug
 #else
 #define c3cn_rx_debug(fmt...)
 #endif
@@ -47,9 +47,9 @@ static int cxgb3_rcv_win = 256 * 1024;
 module_param(cxgb3_rcv_win, int, 0644);
 MODULE_PARM_DESC(cxgb3_rcv_win, "TCP receive window in bytes (default=256KB)");
 
-static int cxgb3_snd_win = 64 * 1024;
+static int cxgb3_snd_win = 128 * 1024;
 module_param(cxgb3_snd_win, int, 0644);
-MODULE_PARM_DESC(cxgb3_snd_win, "TCP send window in bytes (default=64KB)");
+MODULE_PARM_DESC(cxgb3_snd_win, "TCP send window in bytes (default=128KB)");
 
 static int cxgb3_rx_credit_thres = 10 * 1024;
 module_param(cxgb3_rx_credit_thres, int, 0644);
@@ -301,8 +301,8 @@ static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
 static void skb_entail(struct s3_conn *c3cn, struct sk_buff *skb,
 		       int flags)
 {
-	CXGB3_SKB_CB(skb)->seq = c3cn->write_seq;
-	CXGB3_SKB_CB(skb)->flags = flags;
+	skb_tcp_seq(skb) = c3cn->write_seq;
+	skb_flags(skb) = flags;
 	__skb_queue_tail(&c3cn->write_queue, skb);
 }
 
@@ -457,12 +457,9 @@ static unsigned int wrlen __read_mostly;
  * The number of WRs needed for an skb depends on the number of fragments
  * in the skb and whether it has any payload in its main body.  This maps the
  * length of the gather list represented by an skb into the # of necessary WRs.
- *
- * The max. length of an skb is controlled by the max pdu size which is ~16K.
- * Also, assume the min. fragment length is the sector size (512), then add
- * extra fragment counts for iscsi bhs and payload padding.
+ * The extra two fragments are for iscsi bhs and payload padding.
  */
-#define SKB_WR_LIST_SIZE	(16384/512 + 3)
+#define SKB_WR_LIST_SIZE	(MAX_SKB_FRAGS + 2)
 static unsigned int skb_wrs[SKB_WR_LIST_SIZE] __read_mostly;
 
 static void s3_init_wr_tab(unsigned int wr_len)
@@ -485,7 +482,7 @@ static void s3_init_wr_tab(unsigned int wr_len)
 
 static inline void reset_wr_list(struct s3_conn *c3cn)
 {
-	c3cn->wr_pending_head = NULL;
+	c3cn->wr_pending_head = c3cn->wr_pending_tail = NULL;
 }
 
 /*
@@ -496,7 +493,7 @@ static inline void reset_wr_list(struct s3_conn *c3cn)
 static inline void enqueue_wr(struct s3_conn *c3cn,
 			      struct sk_buff *skb)
 {
-	skb_wr_data(skb) = NULL;
+	skb_tx_wr_next(skb) = NULL;
 
 	/*
 	 * We want to take an extra reference since both us and the driver
@@ -509,10 +506,22 @@ static inline void enqueue_wr(struct s3_conn *c3cn,
 	if (!c3cn->wr_pending_head)
 		c3cn->wr_pending_head = skb;
 	else
-		skb_wr_data(skb) = skb;
+		skb_tx_wr_next(c3cn->wr_pending_tail) = skb;
 	c3cn->wr_pending_tail = skb;
 }
 
+static int count_pending_wrs(struct s3_conn *c3cn)
+{
+	int n = 0;
+	const struct sk_buff *skb = c3cn->wr_pending_head;
+
+	while (skb) {
+		n += skb->csum;
+		skb = skb_tx_wr_next(skb);
+	}
+	return n;
+}
+
 static inline struct sk_buff *peek_wr(const struct s3_conn *c3cn)
 {
 	return c3cn->wr_pending_head;
@@ -529,8 +538,8 @@ static inline struct sk_buff *dequeue_wr(struct s3_conn *c3cn)
 
 	if (likely(skb)) {
 		/* Don't bother clearing the tail */
-		c3cn->wr_pending_head = skb_wr_data(skb);
-		skb_wr_data(skb) = NULL;
+		c3cn->wr_pending_head = skb_tx_wr_next(skb);
+		skb_tx_wr_next(skb) = NULL;
 	}
 	return skb;
 }
@@ -543,13 +552,14 @@ static void purge_wr_queue(struct s3_conn *c3cn)
 }
 
 static inline void make_tx_data_wr(struct s3_conn *c3cn, struct sk_buff *skb,
-				   int len)
+				   int len, int req_completion)
 {
 	struct tx_data_wr *req;
 
 	skb_reset_transport_header(skb);
 	req = (struct tx_data_wr *)__skb_push(skb, sizeof(*req));
-	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA) |
+			(req_completion ? F_WR_COMPL : 0));
 	req->wr_lo = htonl(V_WR_TID(c3cn->tid));
 	req->sndseq = htonl(c3cn->snd_nxt);
 	/* len includes the length of any HW ULP additions */
@@ -592,7 +602,7 @@ static int c3cn_push_tx_frames(struct s3_conn *c3cn, int req_completion)
 
 	if (unlikely(c3cn->state == C3CN_STATE_CONNECTING ||
 		     c3cn->state == C3CN_STATE_CLOSE_WAIT_1 ||
-		     c3cn->state == C3CN_STATE_ABORTING)) {
+		     c3cn->state >= C3CN_STATE_ABORTING)) {
 		c3cn_tx_debug("c3cn 0x%p, in closing state %u.\n",
 			      c3cn, c3cn->state);
 		return 0;
@@ -615,7 +625,7 @@ static int c3cn_push_tx_frames(struct s3_conn *c3cn, int req_completion)
 		if (c3cn->wr_avail < wrs_needed) {
 			c3cn_tx_debug("c3cn 0x%p, skb len %u/%u, frag %u, "
 				      "wr %d < %u.\n",
-				      c3cn, skb->len, skb->datalen, frags,
+				      c3cn, skb->len, skb->data_len, frags,
 				      wrs_needed, c3cn->wr_avail);
 			break;
 		}
@@ -627,20 +637,24 @@ static int c3cn_push_tx_frames(struct s3_conn *c3cn, int req_completion)
 		c3cn->wr_unacked += wrs_needed;
 		enqueue_wr(c3cn, skb);
 
-		if (likely(CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_NEED_HDR)) {
-			len += ulp_extra_len(skb);
-			make_tx_data_wr(c3cn, skb, len);
-			c3cn->snd_nxt += len;
-			if ((req_completion
-			     && c3cn->wr_unacked == wrs_needed)
-			    || (CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_COMPL)
-			    || c3cn->wr_unacked >= c3cn->wr_max / 2) {
-				struct work_request_hdr *wr = cplhdr(skb);
+		c3cn_tx_debug("c3cn 0x%p, enqueue, skb len %u/%u, frag %u, "
+				"wr %d, left %u, unack %u.\n",
+				c3cn, skb->len, skb->data_len, frags,
+				wrs_needed, c3cn->wr_avail, c3cn->wr_unacked);
+
 
-				wr->wr_hi |= htonl(F_WR_COMPL);
+		if (likely(skb_flags(skb) & C3CB_FLAG_NEED_HDR)) {
+			if ((req_completion &&
+				c3cn->wr_unacked == wrs_needed) ||
+			    (skb_flags(skb) & C3CB_FLAG_COMPL) ||
+			    c3cn->wr_unacked >= c3cn->wr_max / 2) {
+				req_completion = 1;
 				c3cn->wr_unacked = 0;
 			}
-			CXGB3_SKB_CB(skb)->flags &= ~C3CB_FLAG_NEED_HDR;
+			len += ulp_extra_len(skb);
+			make_tx_data_wr(c3cn, skb, len, req_completion);
+			c3cn->snd_nxt += len;
+			skb_flags(skb) &= ~C3CB_FLAG_NEED_HDR;
 		}
 
 		total_size += skb->truesize;
@@ -735,8 +749,11 @@ static void process_act_establish(struct s3_conn *c3cn, struct sk_buff *skb)
 	if (unlikely(c3cn_flag(c3cn, C3CN_ACTIVE_CLOSE_NEEDED)))
 		/* upper layer has requested closing */
 		send_abort_req(c3cn);
-	else if (c3cn_push_tx_frames(c3cn, 1))
+	else {
+		if (skb_queue_len(&c3cn->write_queue))
+			c3cn_push_tx_frames(c3cn, 1);
 		cxgb3i_conn_tx_open(c3cn);
+	}
 }
 
 static int do_act_establish(struct t3cdev *cdev, struct sk_buff *skb,
@@ -1082,8 +1099,8 @@ static void process_rx_iscsi_hdr(struct s3_conn *c3cn, struct sk_buff *skb)
 		return;
 	}
 
-	CXGB3_SKB_CB(skb)->seq = ntohl(hdr_cpl->seq);
-	CXGB3_SKB_CB(skb)->flags = 0;
+	skb_tcp_seq(skb) = ntohl(hdr_cpl->seq);
+	skb_flags(skb) = 0;
 
 	skb_reset_transport_header(skb);
 	__skb_pull(skb, sizeof(struct cpl_iscsi_hdr));
@@ -1103,12 +1120,12 @@ static void process_rx_iscsi_hdr(struct s3_conn *c3cn, struct sk_buff *skb)
 		goto abort_conn;
 
 	skb_ulp_mode(skb) = ULP2_FLAG_DATA_READY;
-	skb_ulp_pdulen(skb) = ntohs(ddp_cpl.len);
-	skb_ulp_ddigest(skb) = ntohl(ddp_cpl.ulp_crc);
+	skb_rx_pdulen(skb) = ntohs(ddp_cpl.len);
+	skb_rx_ddigest(skb) = ntohl(ddp_cpl.ulp_crc);
 	status = ntohl(ddp_cpl.ddp_status);
 
 	c3cn_rx_debug("rx skb 0x%p, len %u, pdulen %u, ddp status 0x%x.\n",
-		      skb, skb->len, skb_ulp_pdulen(skb), status);
+		      skb, skb->len, skb_rx_pdulen(skb), status);
 
 	if (status & (1 << RX_DDP_STATUS_HCRC_SHIFT))
 		skb_ulp_mode(skb) |= ULP2_FLAG_HCRC_ERROR;
@@ -1126,7 +1143,7 @@ static void process_rx_iscsi_hdr(struct s3_conn *c3cn, struct sk_buff *skb)
 	} else if (status & (1 << RX_DDP_STATUS_DDP_SHIFT))
 		skb_ulp_mode(skb) |= ULP2_FLAG_DATA_DDPED;
 
-	c3cn->rcv_nxt = ntohl(ddp_cpl.seq) + skb_ulp_pdulen(skb);
+	c3cn->rcv_nxt = ntohl(ddp_cpl.seq) + skb_rx_pdulen(skb);
 	__pskb_trim(skb, len);
 	__skb_queue_tail(&c3cn->receive_queue, skb);
 	cxgb3i_conn_pdu_ready(c3cn);
@@ -1151,12 +1168,27 @@ static int do_iscsi_hdr(struct t3cdev *t3dev, struct sk_buff *skb, void *ctx)
  * Process an acknowledgment of WR completion.  Advance snd_una and send the
  * next batch of work requests from the write queue.
  */
+static void check_wr_invariants(struct s3_conn *c3cn)
+{
+	int pending = count_pending_wrs(c3cn);
+
+	if (unlikely(c3cn->wr_avail + pending != c3cn->wr_max))
+		cxgb3i_log_error("TID %u: credit imbalance: avail %u, "
+				"pending %u, total should be %u\n",
+				c3cn->tid, c3cn->wr_avail, pending,
+				c3cn->wr_max);
+}
+
 static void process_wr_ack(struct s3_conn *c3cn, struct sk_buff *skb)
 {
 	struct cpl_wr_ack *hdr = cplhdr(skb);
 	unsigned int credits = ntohs(hdr->credits);
 	u32 snd_una = ntohl(hdr->snd_una);
 
+	c3cn_tx_debug("%u WR credits, avail %u, unack %u, TID %u, state %u.\n",
+			credits, c3cn->wr_avail, c3cn->wr_unacked,
+			c3cn->tid, c3cn->state);
+
 	c3cn->wr_avail += credits;
 	if (c3cn->wr_unacked > c3cn->wr_max - c3cn->wr_avail)
 		c3cn->wr_unacked = c3cn->wr_max - c3cn->wr_avail;
@@ -1171,6 +1203,17 @@ static void process_wr_ack(struct s3_conn *c3cn, struct sk_buff *skb)
 			break;
 		}
 		if (unlikely(credits < p->csum)) {
+			struct tx_data_wr *w = cplhdr(p);
+			cxgb3i_log_error("TID %u got %u WR credits need %u, "
+					 "len %u, main body %u, frags %u, "
+					 "seq # %u, ACK una %u, ACK nxt %u, "
+					 "WR_AVAIL %u, WRs pending %u\n",
+					 c3cn->tid, credits, p->csum, p->len,
+					 p->len - p->data_len,
+					 skb_shinfo(p)->nr_frags,
+					 ntohl(w->sndseq), snd_una,
+					 ntohl(hdr->snd_nxt), c3cn->wr_avail,
+					 count_pending_wrs(c3cn) - credits);
 			p->csum -= credits;
 			break;
 		} else {
@@ -1180,15 +1223,24 @@ static void process_wr_ack(struct s3_conn *c3cn, struct sk_buff *skb)
 		}
 	}
 
-	if (unlikely(before(snd_una, c3cn->snd_una)))
+	check_wr_invariants(c3cn);
+
+	if (unlikely(before(snd_una, c3cn->snd_una))) {
+		cxgb3i_log_error("TID %u, unexpected sequence # %u in WR_ACK "
+				 "snd_una %u\n",
+				 c3cn->tid, snd_una, c3cn->snd_una);
 		goto out_free;
+	}
 
 	if (c3cn->snd_una != snd_una) {
 		c3cn->snd_una = snd_una;
 		dst_confirm(c3cn->dst_cache);
 	}
 
-	if (skb_queue_len(&c3cn->write_queue) && c3cn_push_tx_frames(c3cn, 0))
+	if (skb_queue_len(&c3cn->write_queue)) {
+		if (c3cn_push_tx_frames(c3cn, 0))
+			cxgb3i_conn_tx_open(c3cn);
+	} else
 		cxgb3i_conn_tx_open(c3cn);
 out_free:
 	__kfree_skb(skb);
@@ -1452,7 +1504,7 @@ static void init_offload_conn(struct s3_conn *c3cn,
 			      struct dst_entry *dst)
 {
 	BUG_ON(c3cn->cdev != cdev);
-	c3cn->wr_max = c3cn->wr_avail = T3C_DATA(cdev)->max_wrs;
+	c3cn->wr_max = c3cn->wr_avail = T3C_DATA(cdev)->max_wrs - 1;
 	c3cn->wr_unacked = 0;
 	c3cn->mss_idx = select_mss(c3cn, dst_mtu(dst));
 
@@ -1671,9 +1723,17 @@ int cxgb3i_c3cn_send_pdus(struct s3_conn *c3cn, struct sk_buff *skb)
 		goto out_err;
 	}
 
-	err = -EPIPE;
 	if (c3cn->err) {
 		c3cn_tx_debug("c3cn 0x%p, err %d.\n", c3cn, c3cn->err);
+		err = -EPIPE;
+		goto out_err;
+	}
+
+	if (c3cn->write_seq - c3cn->snd_una >= cxgb3_snd_win) {
+		c3cn_tx_debug("c3cn 0x%p, snd %u - %u > %u.\n",
+				c3cn, c3cn->write_seq, c3cn->snd_una,
+				cxgb3_snd_win);
+		err = -EAGAIN;
 		goto out_err;
 	}
 
diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.h b/drivers/scsi/cxgb3i/cxgb3i_offload.h
index d231569..df1eae0 100644
--- a/drivers/scsi/cxgb3i/cxgb3i_offload.h
+++ b/drivers/scsi/cxgb3i/cxgb3i_offload.h
@@ -178,25 +178,33 @@ void cxgb3i_c3cn_release(struct s3_conn *);
  * @flag:	see C3CB_FLAG_* below
  * @ulp_mode:	ULP mode/submode of sk_buff
  * @seq:	tcp sequence number
- * @ddigest:	pdu data digest
- * @pdulen:	recovered pdu length
- * @wr_data:	scratch area for tx wr
  */
+struct cxgb3_skb_rx_cb {
+	__u32 ddigest;			/* data digest */
+	__u32 pdulen;			/* recovered pdu length */
+};
+
+struct cxgb3_skb_tx_cb {
+	struct sk_buff *wr_next;	/* next wr */
+};
+
 struct cxgb3_skb_cb {
 	__u8 flags;
 	__u8 ulp_mode;
 	__u32 seq;
-	__u32 ddigest;
-	__u32 pdulen;
-	struct sk_buff *wr_data;
+	union {
+		struct cxgb3_skb_rx_cb rx;
+		struct cxgb3_skb_tx_cb tx;
+	};
 };
 
 #define CXGB3_SKB_CB(skb)	((struct cxgb3_skb_cb *)&((skb)->cb[0]))
-
+#define skb_flags(skb)		(CXGB3_SKB_CB(skb)->flags)
 #define skb_ulp_mode(skb)	(CXGB3_SKB_CB(skb)->ulp_mode)
-#define skb_ulp_ddigest(skb)	(CXGB3_SKB_CB(skb)->ddigest)
-#define skb_ulp_pdulen(skb)	(CXGB3_SKB_CB(skb)->pdulen)
-#define skb_wr_data(skb)	(CXGB3_SKB_CB(skb)->wr_data)
+#define skb_tcp_seq(skb)	(CXGB3_SKB_CB(skb)->seq)
+#define skb_rx_ddigest(skb)	(CXGB3_SKB_CB(skb)->rx.ddigest)
+#define skb_rx_pdulen(skb)	(CXGB3_SKB_CB(skb)->rx.pdulen)
+#define skb_tx_wr_next(skb)	(CXGB3_SKB_CB(skb)->tx.wr_next)
 
 enum c3cb_flags {
 	C3CB_FLAG_NEED_HDR = 1 << 0,	/* packet needs a TX_DATA_WR header */
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html