[PATCH 3/5 2.6.29-rc] cxgb3i -- tx pdu need to observe skb's MAX_SKB_FRAGS From: Karen Xie <kxie@xxxxxxxxxxx> While fitting the whole pdu into a skb, need to observe the max. # of fragments in skb (MAX_SKB_FRAGS). Allow max. payload size and check for # of scatter-gather entries it may use when allocating the pdus for transmit. If the data can not fit into the skb's frag list, copy the data. Signed-off-by: Karen Xie <kxie@xxxxxxxxxxx> --- drivers/scsi/cxgb3i/cxgb3i_pdu.c | 410 ++++++++++++++++++++++++++++++-------- drivers/scsi/cxgb3i/cxgb3i_pdu.h | 15 + 2 files changed, 337 insertions(+), 88 deletions(-) diff --git a/drivers/scsi/cxgb3i/cxgb3i_pdu.c b/drivers/scsi/cxgb3i/cxgb3i_pdu.c index ce7ce8c..165fb75 100644 --- a/drivers/scsi/cxgb3i/cxgb3i_pdu.c +++ b/drivers/scsi/cxgb3i/cxgb3i_pdu.c @@ -32,6 +32,10 @@ #define cxgb3i_tx_debug(fmt...) #endif +#define SKB_TX_HEADROOM SKB_MAX_HEAD(TX_HEADER_LEN) +/* always allocate rooms for AHS */ +#define SKB_TX_PDU_HEADER_LEN \ + (sizeof(struct iscsi_hdr) + ISCSI_MAX_AHS_SIZE) static struct page *pad_page; /* @@ -146,12 +150,18 @@ static inline void tx_skb_setmode(struct sk_buff *skb, int hcrc, int dcrc) void cxgb3i_conn_cleanup_task(struct iscsi_task *task) { - struct iscsi_tcp_task *tcp_task = task->dd_data; + struct cxgb3i_task_data *tdata = task->dd_data + + sizeof(struct iscsi_tcp_task); + int i; /* never reached the xmit task callout */ - if (tcp_task->dd_data) - kfree_skb(tcp_task->dd_data); - tcp_task->dd_data = NULL; + for (i = 0; i < MAX_PDU_FULL_PAGES; i++) + if (tdata->pages[i]) + __free_page(tdata->pages[i]); + + if (tdata->skb) + __kfree_skb(tdata->skb); + memset(tdata, 0, sizeof(struct cxgb3i_task_data)); /* MNC - Do we need a check in case this is called but * cxgb3i_conn_alloc_pdu has never been called on the task */ @@ -159,27 +169,162 @@ void cxgb3i_conn_cleanup_task(struct iscsi_task *task) iscsi_tcp_cleanup_task(task); } -/* - * We do not support ahs yet - */ +static int sgl_seek_offset(struct scatterlist *sgl, unsigned int sgcnt, + unsigned int offset, unsigned int *off, + struct scatterlist **sgp) +{ + int i; + struct scatterlist *sg; + + for_each_sg(sgl, sg, sgcnt, i) { + if (offset < sg->length) { + *off = offset; + *sgp = sg; + return 0; + } + offset -= sg->length; + } + return -EFAULT; +} + +static int sgl_read_to_frags(struct scatterlist *sg, unsigned int sgoffset, + unsigned int dlen, skb_frag_t *frags, + int frag_max) +{ + unsigned int datalen = dlen; + unsigned int sglen = sg->length - sgoffset; + struct page *page = sg_page(sg); + int i; + + i = 0; + do { + unsigned int copy; + + if (!sglen) { + sg = sg_next(sg); + if (!sg) { + cxgb3i_log_error("%s, sg NULL, len %u/%u.\n", + __func__, datalen, dlen); + return -EINVAL; + } + sgoffset = 0; + sglen = sg->length; + page = sg_page(sg); + + } + copy = min(datalen, sglen); + if (i && page == frags[i - 1].page && + sgoffset + sg->offset == + frags[i - 1].page_offset + frags[i - 1].size) { + frags[i - 1].size += copy; + } else { + if (i >= frag_max) { + cxgb3i_log_error("%s, too many pages %u, " + "dlen %u.\n", __func__, + frag_max, dlen); + return -EINVAL; + } + + frags[i].page = page; + frags[i].page_offset = sg->offset + sgoffset; + frags[i].size = copy; + i++; + } + datalen -= copy; + sgoffset += copy; + sglen -= copy; + } while (datalen); + + return i; +} + +static inline int task_data_fill_pages(struct cxgb3i_task_data *tdata) +{ + int i; + for (i = 0; i < tdata->nr_pages; i++) + if (!tdata->pages[i]) { + tdata->pages[i] = alloc_page(GFP_ATOMIC); + if (!tdata->pages[i]) + return -ENOMEM; + } + return 0; +} + int cxgb3i_conn_alloc_pdu(struct iscsi_task *task, u8 opcode) { + struct iscsi_conn *conn = task->conn; struct iscsi_tcp_task *tcp_task = task->dd_data; - struct sk_buff *skb; + struct cxgb3i_task_data *tdata = task->dd_data + sizeof(*tcp_task); + struct scsi_cmnd *sc = task->sc; + int headroom = SKB_TX_PDU_HEADER_LEN; + int skb_flag = 0; + int err; + tcp_task->dd_data = tdata; task->hdr = NULL; - /* always allocate rooms for AHS */ - skb = alloc_skb(sizeof(struct iscsi_hdr) + ISCSI_MAX_AHS_SIZE + - TX_HEADER_LEN, GFP_ATOMIC); - if (!skb) + + /* write command, need to send data pdus */ + if (sc && + (scsi_bidi_cmnd(sc) || sc->sc_data_direction == DMA_TO_DEVICE)) { + struct scsi_data_buffer *sdb = scsi_out(task->sc); + struct scatterlist *sg = NULL; + + /* + * calculate the pdu payload data. + * Since only MaxOutstandingR2T=1, DataSequenceInOrder=Yes, + * and DataPDUInOrder=Yes are supported, we can safely assume + * the data will be sent sequentially and each pdu will carry + * no more than conn->max_xmit_dlength bytes of payload. + */ + err = sgl_seek_offset(sdb->table.sgl, sdb->table.nents, + tdata->offset, &tdata->sgoffset, &sg); + if (err < 0) { + cxgb3i_log_warn("tpdu, sgl %u, bad offset %u/%u.\n", + sdb->table.nents, tdata->offset, + sdb->length); + return err; + } + tdata->count = min(sdb->length - tdata->offset, + conn->max_xmit_dlength); + err = sgl_read_to_frags(sg, tdata->sgoffset, tdata->count, + tdata->frags, MAX_PDU_FRAGS); + if (err < 0) { + cxgb3i_log_warn("tpdu, sgl %u, bad offset %u + %u.\n", + sdb->table.nents, tdata->offset, + tdata->count); + return err; + } + tdata->nr_frags = err; + /* + * make sure the payload can fit into one skb, if not we will + * copy the data either into skb headroom or into full pages. + */ + if (tdata->nr_frags >= MAX_SKB_FRAGS) { + if (SKB_TX_HEADROOM < + (headroom + tdata->count)) { + tdata->nr_pages = (tdata->count + + PAGE_SIZE - 1) >> PAGE_SHIFT; + err = task_data_fill_pages(tdata); + if (err < 0) + return err; + skb_flag = PDU_SKB_FLAG_COPY_PAGE; + } else { + headroom += tdata->count; + skb_flag = PDU_SKB_FLAG_COPY_HEAD; + } + } + } + + tdata->skb = alloc_skb(TX_HEADER_LEN + headroom, GFP_ATOMIC); + if (!tdata->skb) return -ENOMEM; + skb_reserve(tdata->skb, TX_HEADER_LEN); + PDU_SKB_CB(tdata->skb)->flags = skb_flag; cxgb3i_tx_debug("task 0x%p, opcode 0x%x, skb 0x%p.\n", - task, opcode, skb); + task, opcode, tdata->skb); - tcp_task->dd_data = skb; - skb_reserve(skb, TX_HEADER_LEN); - task->hdr = (struct iscsi_hdr *)skb->data; + task->hdr = (struct iscsi_hdr *)tdata->skb->data; task->hdr_max = sizeof(struct iscsi_hdr); /* data_out uses scsi_cmd's itt */ @@ -189,110 +334,193 @@ int cxgb3i_conn_alloc_pdu(struct iscsi_task *task, u8 opcode) return 0; } +static int skb_copy_frags_to_pages(struct cxgb3i_task_data *tdata) +{ + struct sk_buff *skb = tdata->skb; + unsigned char *dst = page_address(tdata->pages[0]); + skb_frag_t *frag = tdata->frags; + unsigned char *src; + unsigned int pg_left = PAGE_SIZE; + unsigned int copy_total = 0; + int i, j; + + for (i = 0, j = 0; i < tdata->nr_frags; i++, frag++) { + unsigned int len = frag->size; + + src = page_address(frag->page) + frag->page_offset; + while (len) { + unsigned int copy; + + if (pg_left == PAGE_SIZE) { + tdata->pages[j] = NULL; + dst = page_address(tdata->pages[j]); + skb_fill_page_desc(skb, j, tdata->pages[j], + 0, 0); + } + + copy = min(pg_left, frag->size); + memcpy(dst, src, copy); + + skb_shinfo(skb)->frags[j].size += copy; + len -= copy; + src += copy; + dst += copy; + pg_left -= copy; + copy_total += copy; + + if (!pg_left) { + j++; + pg_left = PAGE_SIZE; + } + } + } + skb->len += copy_total; + skb->data_len += copy_total; + skb->truesize += copy_total; + + return copy_total; +} + + int cxgb3i_conn_init_pdu(struct iscsi_task *task, unsigned int offset, unsigned int count) { - struct iscsi_tcp_task *tcp_task = task->dd_data; - struct sk_buff *skb = tcp_task->dd_data; struct iscsi_conn *conn = task->conn; - struct page *pg; + struct iscsi_tcp_task *tcp_task = task->dd_data; + struct cxgb3i_task_data *tdata = tcp_task->dd_data; + struct sk_buff *skb = tdata->skb; unsigned int datalen = count; + u8 skb_flag = PDU_SKB_CB(skb)->flags; int i, padlen = iscsi_padding(count); - skb_frag_t *frag; + int err; + struct page *pg; cxgb3i_tx_debug("task 0x%p,0x%p, offset %u, count %u, skb 0x%p.\n", task, task->sc, offset, count, skb); + memset(PDU_SKB_CB(skb), 0, sizeof(struct pdu_skb_cb)); skb_put(skb, task->hdr_len); tx_skb_setmode(skb, conn->hdrdgst_en, datalen ? conn->datadgst_en : 0); if (!count) return 0; if (task->sc) { - struct scatterlist *sg; - struct scsi_data_buffer *sdb; - unsigned int sgoffset = offset; - struct page *sgpg; - unsigned int sglen; - - sdb = scsi_out(task->sc); - sg = sdb->table.sgl; - - for_each_sg(sdb->table.sgl, sg, sdb->table.nents, i) { - cxgb3i_tx_debug("sg %d, page 0x%p, len %u offset %u\n", - i, sg_page(sg), sg->length, sg->offset); - - if (sgoffset < sg->length) - break; - sgoffset -= sg->length; + struct scsi_data_buffer *sdb = scsi_out(task->sc); + + if (offset != tdata->offset) { + struct scatterlist *sg = NULL; + + tdata->offset = offset; + err = sgl_seek_offset(sdb->table.sgl, sdb->table.nents, + offset, &tdata->sgoffset, &sg); + if (err < 0) + return err; + err = sgl_read_to_frags(sg, tdata->sgoffset, + count, tdata->frags, + MAX_PDU_FRAGS); + if (err < 0) + return err; + tdata->nr_frags = err; + tdata->count = count; } - sgpg = sg_page(sg); - sglen = sg->length - sgoffset; - do { - int j = skb_shinfo(skb)->nr_frags; - unsigned int copy; + if (count < tdata->count) { + unsigned int dlen = count; + skb_frag_t *frag = tdata->frags; - if (!sglen) { - sg = sg_next(sg); - sgpg = sg_page(sg); - sgoffset = 0; - sglen = sg->length; - ++i; + for (i = 0; i < tdata->nr_frags; i++, frag++) { + if (dlen < frag->size) + break; + dlen -= frag->size; } - copy = min(sglen, datalen); - if (j && skb_can_coalesce(skb, j, sgpg, - sg->offset + sgoffset)) { - skb_shinfo(skb)->frags[j - 1].size += copy; + tdata->nr_frags = i; + frag->size = dlen; + } + + if (tdata->nr_frags > MAX_SKB_FRAGS || + (padlen && (tdata->nr_frags + 1) > MAX_SKB_FRAGS)) { + if (!skb_flag) { + cxgb3i_log_warn("frag %u, skb not set up %u.\n", + tdata->nr_frags, padlen); + return -EINVAL; + } + + if (skb_flag == PDU_SKB_FLAG_COPY_HEAD) { + char *dst = skb->data + task->hdr_len; + skb_frag_t *frag = tdata->frags; + + /* data fits in the skb's headroom */ + for (i = 0; i < tdata->nr_frags; i++, frag++) { + memcpy(dst, + page_address(frag->page) + + frag->page_offset, frag->size); + dst += frag->size; + } + if (padlen) { + memset(dst, 0, padlen); + padlen = 0; + } + skb_put(skb, count + padlen); } else { - get_page(sgpg); - skb_fill_page_desc(skb, j, sgpg, - sg->offset + sgoffset, copy); + err = skb_copy_frags_to_pages(tdata); + if (err != count) { + cxgb3i_log_warn("skb copy %u != %d.\n", + count, err); + return -EINVAL; + } } - sgoffset += copy; - sglen -= copy; - datalen -= copy; - } while (datalen); + } else { + /* data fit into frag_list */ + for (i = 0; i < tdata->nr_frags; i++) + get_page(tdata->frags[i].page); + + memcpy(skb_shinfo(skb)->frags, tdata->frags, + sizeof(skb_frag_t) * tdata->nr_frags); + skb_shinfo(skb)->nr_frags = tdata->nr_frags; + + skb->len += count; + skb->data_len += count; + skb->truesize += count; + } + + /* + * update the offset so we know where to start for the next + * alloc_pdu() + */ + tdata->offset = offset + count; + tdata->count = 0; } else { pg = virt_to_page(task->data); - while (datalen) { - i = skb_shinfo(skb)->nr_frags; - frag = &skb_shinfo(skb)->frags[i]; - - get_page(pg); - frag->page = pg; - frag->page_offset = 0; - frag->size = min((unsigned int)PAGE_SIZE, datalen); - - skb_shinfo(skb)->nr_frags++; - datalen -= frag->size; - pg++; - } + get_page(pg); + skb_fill_page_desc(skb, 0, pg, offset_in_page(task->data), + count); + skb->len += count; + skb->data_len += count; + skb->truesize += count; } if (padlen) { i = skb_shinfo(skb)->nr_frags; - frag = &skb_shinfo(skb)->frags[i]; - frag->page = pad_page; - frag->page_offset = 0; - frag->size = padlen; - skb_shinfo(skb)->nr_frags++; + get_page(pad_page); + skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, pad_page, 0, + padlen); + + skb->data_len += padlen; + skb->truesize += padlen; + skb->len += padlen; } - datalen = count + padlen; - skb->data_len += datalen; - skb->truesize += datalen; - skb->len += datalen; return 0; } int cxgb3i_conn_xmit_pdu(struct iscsi_task *task) { - struct iscsi_tcp_task *tcp_task = task->dd_data; - struct sk_buff *skb = tcp_task->dd_data; struct iscsi_tcp_conn *tcp_conn = task->conn->dd_data; struct cxgb3i_conn *cconn = tcp_conn->dd_data; + struct iscsi_tcp_task *tcp_task = task->dd_data; + struct cxgb3i_task_data *tdata = tcp_task->dd_data; + struct sk_buff *skb = tdata->skb; unsigned int datalen; int err; @@ -300,13 +528,14 @@ int cxgb3i_conn_xmit_pdu(struct iscsi_task *task) return 0; datalen = skb->data_len; - tcp_task->dd_data = NULL; + tdata->skb = NULL; err = cxgb3i_c3cn_send_pdus(cconn->cep->c3cn, skb); - cxgb3i_tx_debug("task 0x%p, skb 0x%p, len %u/%u, rv %d.\n", - task, skb, skb->len, skb->data_len, err); if (err > 0) { int pdulen = err; + cxgb3i_tx_debug("task 0x%p, skb 0x%p, len %u/%u, rv %d.\n", + task, skb, skb->len, skb->data_len, err); + if (task->conn->hdrdgst_en) pdulen += ISCSI_DIGEST_SIZE; if (datalen && task->conn->datadgst_en) @@ -325,7 +554,7 @@ int cxgb3i_conn_xmit_pdu(struct iscsi_task *task) return err; } /* reset skb to send when we are called again */ - tcp_task->dd_data = skb; + tdata->skb = skb; return -EAGAIN; } @@ -366,7 +595,9 @@ void cxgb3i_conn_pdu_ready(struct s3_conn *c3cn) skb = skb_peek(&c3cn->receive_queue); while (!err && skb) { __skb_unlink(skb, &c3cn->receive_queue); - read += skb_ulp_pdulen(skb); + read += skb_rx_pdulen(skb); + cxgb3i_rx_debug("conn 0x%p, cn 0x%p, rx skb 0x%p, pdulen %u.\n", + conn, c3cn, skb, skb_rx_pdulen(skb)); err = cxgb3i_conn_read_pdu_skb(conn, skb); __kfree_skb(skb); skb = skb_peek(&c3cn->receive_queue); @@ -377,6 +608,11 @@ void cxgb3i_conn_pdu_ready(struct s3_conn *c3cn) cxgb3i_c3cn_rx_credits(c3cn, read); } conn->rxdata_octets += read; + + if (err) { + cxgb3i_log_info("conn 0x%p rx failed err %d.\n", conn, err); + iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); + } } void cxgb3i_conn_tx_open(struct s3_conn *c3cn) diff --git a/drivers/scsi/cxgb3i/cxgb3i_pdu.h b/drivers/scsi/cxgb3i/cxgb3i_pdu.h index a3f685c..0d12892 100644 --- a/drivers/scsi/cxgb3i/cxgb3i_pdu.h +++ b/drivers/scsi/cxgb3i/cxgb3i_pdu.h @@ -33,6 +33,19 @@ struct cpl_rx_data_ddp_norss { u32 ddp_status; }; +/** + * pdu_skb_cb - control block for transmit pdu skb information. + * + * @flag: see PDU_SKB_FLAG_xxx below + */ +#define PDU_SKB_FLAG_COPY_HEAD 0x1 +#define PDU_SKB_FLAG_COPY_PAGE 0x2 +struct pdu_skb_cb { + __u8 flags; +}; +#define PDU_SKB_CB(skb) ((struct pdu_skb_cb *)&((skb)->cb[0])) + + #define RX_DDP_STATUS_IPP_SHIFT 27 /* invalid pagepod */ #define RX_DDP_STATUS_TID_SHIFT 26 /* tid mismatch */ #define RX_DDP_STATUS_COLOR_SHIFT 25 /* color mismatch */ @@ -53,7 +66,7 @@ struct cpl_rx_data_ddp_norss { #define ULP2_FLAG_DCRC_ERROR 0x20 #define ULP2_FLAG_PAD_ERROR 0x40 -void cxgb3i_conn_closing(struct s3_conn *); +void cxgb3i_conn_closing(struct s3_conn *c3cn); void cxgb3i_conn_pdu_ready(struct s3_conn *c3cn); void cxgb3i_conn_tx_open(struct s3_conn *c3cn); #endif -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html