[PATCH 2/3 2.6.29] cxgb3i -- get rid of GFP_NOFAIL From: Karen Xie <kxie@xxxxxxxxxxx> - get rid of GFP_NOFAIL in memory allocation calls - move the port map to be per-adapter. - fixed maximum number of skb frag calculations to be based on the sector size instead of page size. - grouped related code together in functional blocks. - added more connection states to better tracking the state transitions. - added more comments Signed-off-by: Karen Xie <kxie@xxxxxxxxxxx> --- drivers/scsi/cxgb3i/cxgb3i.h | 2 drivers/scsi/cxgb3i/cxgb3i_init.c | 2 drivers/scsi/cxgb3i/cxgb3i_iscsi.c | 4 drivers/scsi/cxgb3i/cxgb3i_offload.c | 2549 ++++++++++++++++------------------ drivers/scsi/cxgb3i/cxgb3i_offload.h | 247 ++- 5 files changed, 1301 insertions(+), 1503 deletions(-) diff --git a/drivers/scsi/cxgb3i/cxgb3i.h b/drivers/scsi/cxgb3i/cxgb3i.h index 1c2301e..2d541a5 100644 --- a/drivers/scsi/cxgb3i/cxgb3i.h +++ b/drivers/scsi/cxgb3i/cxgb3i.h @@ -86,7 +86,7 @@ struct cxgb3i_adapter { * struct cxgb3i_conn - cxgb3i iscsi connection * * @listhead: list head to link elements - * @conn: pointer to iscsi_endpoint structure + * @cep: pointer to iscsi_endpoint structure * @conn: pointer to iscsi_conn structure * @hba: pointer to the hba this conn. is going through * @task_idx_bits: # of bits needed for session->cmds_max diff --git a/drivers/scsi/cxgb3i/cxgb3i_init.c b/drivers/scsi/cxgb3i/cxgb3i_init.c index 236d5e1..6703682 100644 --- a/drivers/scsi/cxgb3i/cxgb3i_init.c +++ b/drivers/scsi/cxgb3i/cxgb3i_init.c @@ -48,7 +48,6 @@ static void open_s3_dev(struct t3cdev *t3dev) vers_printed = 1; } - cxgb3i_log_debug("open cxgb3 %s.\n", t3dev->name); cxgb3i_sdev_add(t3dev, &t3c_client); cxgb3i_adapter_add(t3dev); } @@ -59,7 +58,6 @@ static void open_s3_dev(struct t3cdev *t3dev) */ static void close_s3_dev(struct t3cdev *t3dev) { - cxgb3i_log_debug("close cxgb3 %s.\n", t3dev->name); cxgb3i_adapter_remove(t3dev); cxgb3i_sdev_remove(t3dev); } diff --git a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c index 207c182..cf12eb1 100644 --- a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c +++ b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c @@ -253,7 +253,7 @@ static struct iscsi_endpoint *cxgb3i_ep_connect(struct sockaddr *dst_addr, cxgb3i_log_info("NOT going through cxgbi device.\n"); goto release_conn; } - if (c3cn_in_state(c3cn, C3CN_STATE_CLOSE)) { + if (c3cn_is_closing(c3cn)) { err = -ENOSPC; cxgb3i_log_info("ep connect unable to connect.\n"); goto release_conn; @@ -292,7 +292,7 @@ static int cxgb3i_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) struct cxgb3i_endpoint *cep = ep->dd_data; struct s3_conn *c3cn = cep->c3cn; - if (!c3cn_in_state(c3cn, C3CN_STATE_ESTABLISHED)) + if (!c3cn_is_established(c3cn)) return 0; cxgb3i_api_debug("ep 0x%p, c3cn 0x%p established.\n", ep, c3cn); return 1; diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.c b/drivers/scsi/cxgb3i/cxgb3i_offload.c index 8d0b3bf..06cc7c9 100644 --- a/drivers/scsi/cxgb3i/cxgb3i_offload.c +++ b/drivers/scsi/cxgb3i/cxgb3i_offload.c @@ -1,12 +1,15 @@ /* - * Copyright (C) 2003-2008 Chelsio Communications. All rights reserved. + * cxgb3i_offload.c: Chelsio S3xx iscsi offloaded tcp connection management * - * Written by Dimitris Michailidis (dm@xxxxxxxxxxx) + * Copyright (C) 2003-2008 Chelsio Communications. All rights reserved. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this * release for licensing terms and conditions. + * + * Written by: Dimitris Michailidis (dm@xxxxxxxxxxx) + * Karen Xie (kxie@xxxxxxxxxxx) */ #include <linux/if_vlan.h> @@ -17,7 +20,29 @@ #include "firmware_exports.h" #include "cxgb3i_offload.h" #include "cxgb3i_ulp2.h" +#include "cxgb3i_ddp.h" + +#ifdef __DEBUG_C3CN_CONN__ +#define c3cn_conn_debug cxgb3i_log_info +#else +#define c3cn_conn_debug(fmt...) +#endif + +#ifdef __DEBUG_C3CN_TX__ +#define c3cn_tx_debug cxgb3i_log_debug +#else +#define c3cn_tx_debug(fmt...) +#endif + +#ifdef __DEBUG_C3CN_RX__ +#define c3cn_rx_debug cxgb3i_log_debug +#else +#define c3cn_rx_debug(fmt...) +#endif +/* + * module parameters releated to offloaded iscsi connection + */ static int cxgb3_rcv_win = 256 * 1024; module_param(cxgb3_rcv_win, int, 0644); MODULE_PARM_DESC(cxgb3_rcv_win, "TCP receive window in bytes (default=256KB)"); @@ -39,24 +64,91 @@ static unsigned int cxgb3_sport_base = 20000; module_param(cxgb3_sport_base, uint, 0644); MODULE_PARM_DESC(cxgb3_sport_base, "starting port number (default=20000)"); -#ifdef __DEBUG_C3CN_TX__ -#define c3cn_tx_debug cxgb3i_log_debug -#else -#define c3cn_tx_debug(fmt...) -#endif +/* + * cxgb3i tcp connection data(per adapter) list + */ +static LIST_HEAD(cdata_list); +static DEFINE_RWLOCK(cdata_rwlock); + +static int c3cn_push_tx_frames(struct s3_conn *c3cn, int req_completion); +static void c3cn_release_offload_resources(struct s3_conn *c3cn); + +/* + * iscsi source port management + * + * Find a free source port in the port allocation map. We use a very simple + * rotor scheme to look for the next free port. + * + * If a source port has been specified make sure that it doesn't collide with + * our normal source port allocation map. If it's outside the range of our + * allocation/deallocation scheme just let them use it. + * + * If the source port is outside our allocation range, the caller is + * responsible for keeping track of their port usage. + */ +static int c3cn_get_port(struct s3_conn *c3cn, struct cxgb3i_sdev_data *cdata) +{ + unsigned int start; + int idx; + + if (!cdata) + goto error_out; + + if (c3cn->saddr.sin_port != 0) { + idx = ntohs(c3cn->saddr.sin_port) - cxgb3_sport_base; + if (idx < 0 || idx >= cxgb3_max_connect) + return 0; + if (!test_and_set_bit(idx, cdata->sport_map)) + return -EADDRINUSE; + } + + /* the sport_map_next may not be accurate but that is okay, sport_map + should be */ + start = idx = cdata->sport_map_next; + do { + if (++idx >= cxgb3_max_connect) + idx = 0; + if (!(test_and_set_bit(idx, cdata->sport_map))) { + c3cn->saddr.sin_port = htons(cxgb3_sport_base + idx); + cdata->sport_map_next = idx; + c3cn_conn_debug("%s reserve port %u.\n", + cdata->cdev->name, + cxgb3_sport_base + idx); + return 0; + } + } while (idx != start); + +error_out: + return -EADDRNOTAVAIL; +} + +static void c3cn_put_port(struct s3_conn *c3cn) +{ + struct cxgb3i_sdev_data *cdata = CXGB3_SDEV_DATA(c3cn->cdev); + + if (c3cn->saddr.sin_port) { + int idx = ntohs(c3cn->saddr.sin_port) - cxgb3_sport_base; + + c3cn->saddr.sin_port = 0; + if (idx < 0 || idx >= cxgb3_max_connect) + return; + clear_bit(idx, cdata->sport_map); + c3cn_conn_debug("%s, release port %u.\n", + cdata->cdev->name, cxgb3_sport_base + idx); + } +} -/* connection flags */ static inline void c3cn_set_flag(struct s3_conn *c3cn, enum c3cn_flags flag) { __set_bit(flag, &c3cn->flags); - c3cn_conn_debug("c3cn 0x%p, set %d, s 0x%x, f 0x%lx.\n", + c3cn_conn_debug("c3cn 0x%p, set %d, s %u, f 0x%lx.\n", c3cn, flag, c3cn->state, c3cn->flags); } -static inline void c3cn_reset_flag(struct s3_conn *c3cn, enum c3cn_flags flag) +static inline void c3cn_clear_flag(struct s3_conn *c3cn, enum c3cn_flags flag) { __clear_bit(flag, &c3cn->flags); - c3cn_conn_debug("c3cn 0x%p, clear %d, s 0x%x, f 0x%lx.\n", + c3cn_conn_debug("c3cn 0x%p, clear %d, s %u, f 0x%lx.\n", c3cn, flag, c3cn->state, c3cn->flags); } @@ -67,14 +159,12 @@ static inline int c3cn_flag(struct s3_conn *c3cn, enum c3cn_flags flag) return test_bit(flag, &c3cn->flags); } -/* connection state */ static void c3cn_set_state(struct s3_conn *c3cn, int state) { - c3cn_conn_debug("c3cn 0x%p state -> 0x%x.\n", c3cn, state); + c3cn_conn_debug("c3cn 0x%p state -> %u.\n", c3cn, state); c3cn->state = state; } -/* connection reference count */ static inline void c3cn_hold(struct s3_conn *c3cn) { atomic_inc(&c3cn->refcnt); @@ -83,431 +173,316 @@ static inline void c3cn_hold(struct s3_conn *c3cn) static inline void c3cn_put(struct s3_conn *c3cn) { if (atomic_dec_and_test(&c3cn->refcnt)) { - c3cn_conn_debug("free c3cn 0x%p, 0x%x, 0x%lx.\n", + c3cn_conn_debug("free c3cn 0x%p, s %u, f 0x%lx.\n", c3cn, c3cn->state, c3cn->flags); kfree(c3cn); } } -/* minimal port allocation management scheme */ -static spinlock_t sport_map_lock; -static unsigned int sport_map_next; -static unsigned long *sport_map; +static void c3cn_closed(struct s3_conn *c3cn) +{ + c3cn_conn_debug("c3cn 0x%p, state %u, flag 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + + c3cn_put_port(c3cn); + c3cn_release_offload_resources(c3cn); + c3cn_set_state(c3cn, C3CN_STATE_CLOSED); + cxgb3i_conn_closing(c3cn); +} + +/* + * CPL (Chelsio Protocol Language) defines a message passing interface between + * the host driver and T3 asic. + * The section below implments CPLs that related to iscsi tcp connection + * open/close/abort and data send/receive. + */ /* - * Find a free source port in our allocation map. We use a very simple rotor - * scheme to look for the next free port. - * - * If a source port has been specified make sure that it doesn't collide with - * our normal source port allocation map. If it's outside the range of our - * allocation scheme just let them use it. + * CPL connection active open request: host -> */ -static int c3cn_get_port(struct s3_conn *c3cn) +static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu) { - unsigned int start; + int i = 0; - if (!sport_map) - goto error_out; + while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu) + ++i; + return i; +} - if (c3cn->saddr.sin_port != 0) { - int sport = ntohs(c3cn->saddr.sin_port) - cxgb3_sport_base; - int err = 0; +static unsigned int select_mss(struct s3_conn *c3cn, unsigned int pmtu) +{ + unsigned int idx; + struct dst_entry *dst = c3cn->dst_cache; + struct t3cdev *cdev = c3cn->cdev; + const struct t3c_data *td = T3C_DATA(cdev); + u16 advmss = dst_metric(dst, RTAX_ADVMSS); - if (sport < 0 || sport >= cxgb3_max_connect) - return 0; - spin_lock(&sport_map_lock); - err = __test_and_set_bit(sport, sport_map); - spin_unlock(&sport_map_lock); - return err ? -EADDRINUSE : 0; - } + if (advmss > pmtu - 40) + advmss = pmtu - 40; + if (advmss < td->mtus[0] - 40) + advmss = td->mtus[0] - 40; + idx = find_best_mtu(td, advmss + 40); + return idx; +} - spin_lock(&sport_map_lock); - start = sport_map_next; - do { - unsigned int new = sport_map_next; - if (++sport_map_next >= cxgb3_max_connect) - sport_map_next = 0; - if (!(__test_and_set_bit(new, sport_map))) { - spin_unlock(&sport_map_lock); - c3cn_conn_debug("reserve port %u.\n", - cxgb3_sport_base + new); - c3cn->saddr.sin_port = htons(cxgb3_sport_base + new); - return 0; - } - } while (sport_map_next != start); - spin_unlock(&sport_map_lock); +static inline int compute_wscale(int win) +{ + int wscale = 0; + while (wscale < 14 && (65535<<wscale) < win) + wscale++; + return wscale; +} -error_out: - return -EADDRNOTAVAIL; +static inline unsigned int calc_opt0h(struct s3_conn *c3cn) +{ + int wscale = compute_wscale(cxgb3_rcv_win); + return V_KEEP_ALIVE(1) | + F_TCAM_BYPASS | + V_WND_SCALE(wscale) | + V_MSS_IDX(c3cn->mss_idx); } -/* - * Deallocate a source port from the allocation map. If the source port is - * outside our allocation range just return -- the caller is responsible for - * keeping track of their port usage outside of our allocation map. - */ -static void c3cn_put_port(struct s3_conn *c3cn) +static inline unsigned int calc_opt0l(struct s3_conn *c3cn) { - if (c3cn->saddr.sin_port) { - int old = ntohs(c3cn->saddr.sin_port) - cxgb3_sport_base; - c3cn->saddr.sin_port = 0; + return V_ULP_MODE(ULP_MODE_ISCSI) | + V_RCV_BUFSIZ(cxgb3_rcv_win>>10); +} - if (old < 0 || old >= cxgb3_max_connect) - return; +static void make_act_open_req(struct s3_conn *c3cn, struct sk_buff *skb, + unsigned int atid, const struct l2t_entry *e) +{ + struct cpl_act_open_req *req; - c3cn_conn_debug("release port %u.\n", cxgb3_sport_base + old); - spin_lock(&sport_map_lock); - __clear_bit(old, sport_map); - spin_unlock(&sport_map_lock); - } + c3cn_conn_debug("c3cn 0x%p, atid 0x%x.\n", c3cn, atid); + + skb->priority = CPL_PRIORITY_SETUP; + req = (struct cpl_act_open_req *)__skb_put(skb, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid)); + req->local_port = c3cn->saddr.sin_port; + req->peer_port = c3cn->daddr.sin_port; + req->local_ip = c3cn->saddr.sin_addr.s_addr; + req->peer_ip = c3cn->daddr.sin_addr.s_addr; + req->opt0h = htonl(calc_opt0h(c3cn) | V_L2T_IDX(e->idx) | + V_TX_CHANNEL(e->smt_idx)); + req->opt0l = htonl(calc_opt0l(c3cn)); + req->params = 0; } -static void c3cn_reset_timer(struct s3_conn *c3cn, struct timer_list *timer, - unsigned long expires) +static void fail_act_open(struct s3_conn *c3cn, int errno) { - if (!mod_timer(timer, expires)) - c3cn_hold(c3cn); + c3cn_conn_debug("c3cn 0x%p, state %u, flag 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + c3cn->err = errno; + c3cn_closed(c3cn); } -typedef int (cxgb3_cpl_handler_decl) (struct t3cdev *, - struct sk_buff *, void *); - -static cxgb3_cpl_handler_decl do_act_establish; -static cxgb3_cpl_handler_decl do_act_open_rpl; -static cxgb3_cpl_handler_decl do_wr_ack; -static cxgb3_cpl_handler_decl do_peer_close; -static cxgb3_cpl_handler_decl do_abort_req; -static cxgb3_cpl_handler_decl do_abort_rpl; -static cxgb3_cpl_handler_decl do_close_con_rpl; -static cxgb3_cpl_handler_decl do_iscsi_hdr; +static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb) +{ + struct s3_conn *c3cn = (struct s3_conn *)skb->sk; -static LIST_HEAD(cxgb3_list); -static DEFINE_MUTEX(cxgb3_list_lock); + c3cn_conn_debug("c3cn 0x%p, state %u.\n", c3cn, c3cn->state); -/* - * For ULP connections HW may inserts digest bytes into the pdu. This array - * contains the compensating extra lengths for ULP packets. It is indexed by - * a packet's ULP submode. - */ -static const unsigned int cxgb3_ulp_extra_len[] = { 0, 4, 4, 8 }; + c3cn_hold(c3cn); + spin_lock_bh(&c3cn->lock); + if (c3cn->state == C3CN_STATE_CONNECTING) + fail_act_open(c3cn, EHOSTUNREACH); + spin_unlock_bh(&c3cn->lock); + c3cn_put(c3cn); + __kfree_skb(skb); +} /* - * Return the length of any HW additions that will be made to a Tx packet. - * Such additions can happen for some types of ULP packets. + * CPL connection close request: host -> + * + * Close a connection by sending a CPL_CLOSE_CON_REQ message and queue it to + * the write queue (i.e., after any unsent txt data). */ -static inline unsigned int ulp_extra_len(const struct sk_buff *skb) +static void skb_entail(struct s3_conn *c3cn, struct sk_buff *skb, + int flags) { - return cxgb3_ulp_extra_len[skb_ulp_mode(skb) & 3]; + CXGB3_SKB_CB(skb)->seq = c3cn->write_seq; + CXGB3_SKB_CB(skb)->flags = flags; + __skb_queue_tail(&c3cn->write_queue, skb); } -/* - * Size of WRs in bytes. Note that we assume all devices we are handling have - * the same WR size. - */ -static unsigned int wrlen __read_mostly; - -/* - * The number of WRs needed for an skb depends on the number of page fragments - * in the skb and whether it has any payload in its main body. This maps the - * length of the gather list represented by an skb into the # of necessary WRs. - */ -static unsigned int skb_wrs[MAX_SKB_FRAGS + 2] __read_mostly; - -static void s3_init_wr_tab(unsigned int wr_len) +static void send_close_req(struct s3_conn *c3cn) { - int i; + struct sk_buff *skb = c3cn->cpl_close; + struct cpl_close_con_req *req = (struct cpl_close_con_req *)skb->head; + unsigned int tid = c3cn->tid; - if (skb_wrs[1]) /* already initialized */ - return; + c3cn_conn_debug("c3cn 0x%p, state 0x%x, flag 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); - for (i = 1; i < ARRAY_SIZE(skb_wrs); i++) { - int sgl_len = (3 * i) / 2 + (i & 1); + c3cn->cpl_close = NULL; - sgl_len += 3; - skb_wrs[i] = (sgl_len <= wr_len - ? 1 : 1 + (sgl_len - 2) / (wr_len - 1)); - } + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); + req->wr.wr_lo = htonl(V_WR_TID(tid)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); + req->rsvd = htonl(c3cn->write_seq); - wrlen = wr_len * 8; + skb_entail(c3cn, skb, C3CB_FLAG_NO_APPEND); + if (c3cn->state != C3CN_STATE_CONNECTING) + c3cn_push_tx_frames(c3cn, 1); } /* - * cxgb3i API operations. - */ -/* - * large memory chunk allocation/release + * CPL connection abort request: host -> + * + * Send an ABORT_REQ message. Makes sure we do not send multiple ABORT_REQs + * for the same connection and also that we do not try to send a message + * after the connection has closed. */ -void *cxgb3i_alloc_big_mem(unsigned int size) +static void abort_arp_failure(struct t3cdev *cdev, struct sk_buff *skb) { - void *p = kmalloc(size, GFP_KERNEL); - if (!p) - p = vmalloc(size); - if (p) - memset(p, 0, size); - return p; -} + struct cpl_abort_req *req = cplhdr(skb); -void cxgb3i_free_big_mem(void *addr) -{ - if (is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); -} + c3cn_conn_debug("tdev 0x%p.\n", cdev); -void cxgb3i_sdev_cleanup(void) -{ - if (sport_map) - cxgb3i_free_big_mem(sport_map); + req->cmd = CPL_ABORT_NO_RST; + cxgb3_ofld_send(cdev, skb); } -int cxgb3i_sdev_init(cxgb3_cpl_handler_func *cpl_handlers) +static inline void c3cn_purge_write_queue(struct s3_conn *c3cn) { - cpl_handlers[CPL_ACT_ESTABLISH] = do_act_establish; - cpl_handlers[CPL_ACT_OPEN_RPL] = do_act_open_rpl; - cpl_handlers[CPL_PEER_CLOSE] = do_peer_close; - cpl_handlers[CPL_ABORT_REQ_RSS] = do_abort_req; - cpl_handlers[CPL_ABORT_RPL_RSS] = do_abort_rpl; - cpl_handlers[CPL_CLOSE_CON_RPL] = do_close_con_rpl; - cpl_handlers[CPL_TX_DMA_ACK] = do_wr_ack; - cpl_handlers[CPL_ISCSI_HDR] = do_iscsi_hdr; + struct sk_buff *skb; - if (cxgb3_max_connect > CXGB3I_MAX_CONN) - cxgb3_max_connect = CXGB3I_MAX_CONN; - sport_map = cxgb3i_alloc_big_mem(DIV_ROUND_UP(cxgb3_max_connect, - 8 * - sizeof(unsigned long))); - if (!sport_map) - return -ENOMEM; - return 0; + while ((skb = __skb_dequeue(&c3cn->write_queue))) + __kfree_skb(skb); } -void cxgb3i_sdev_add(struct t3cdev *cdev, struct cxgb3_client *client) +static void send_abort_req(struct s3_conn *c3cn) { - struct cxgb3i_sdev_data *cdata; - struct adap_ports *ports; - struct ofld_page_info rx_page_info; - unsigned int wr_len; - int i; + struct sk_buff *skb = c3cn->cpl_abort_req; + struct cpl_abort_req *req; + unsigned int tid = c3cn->tid; - cdata = kzalloc(sizeof *cdata, GFP_KERNEL); - if (!cdata) + if (unlikely(c3cn->state == C3CN_STATE_ABORTING) || !skb || + !c3cn->cdev) return; - ports = kzalloc(sizeof *ports, GFP_KERNEL); - if (!ports) - goto free_ports; - cdata->ports = ports; - if (cdev->ctl(cdev, GET_WR_LEN, &wr_len) < 0 || - cdev->ctl(cdev, GET_PORTS, cdata->ports) < 0 || - cdev->ctl(cdev, GET_RX_PAGE_INFO, &rx_page_info) < 0) - goto free_ports; + c3cn_set_state(c3cn, C3CN_STATE_ABORTING); - s3_init_wr_tab(wr_len); + c3cn_conn_debug("c3cn 0x%p, flag ABORT_RPL + ABORT_SHUT.\n", c3cn); - INIT_LIST_HEAD(&cdata->list); - cdata->cdev = cdev; - cdata->client = client; - cdata->rx_page_size = rx_page_info.page_size; - skb_queue_head_init(&cdata->deferq); + c3cn_set_flag(c3cn, C3CN_ABORT_RPL_PENDING); - for (i = 0; i < ports->nports; i++) - NDEV2CDATA(ports->lldevs[i]) = cdata; + /* Purge the send queue so we don't send anything after an abort. */ + c3cn_purge_write_queue(c3cn); - mutex_lock(&cxgb3_list_lock); - list_add_tail(&cdata->list, &cxgb3_list); - mutex_unlock(&cxgb3_list_lock); + c3cn->cpl_abort_req = NULL; + req = (struct cpl_abort_req *)skb->head; - return; + skb->priority = CPL_PRIORITY_DATA; + set_arp_failure_handler(skb, abort_arp_failure); + + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); + req->wr.wr_lo = htonl(V_WR_TID(tid)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); + req->rsvd0 = htonl(c3cn->snd_nxt); + req->rsvd1 = !c3cn_flag(c3cn, C3CN_TX_DATA_SENT); + req->cmd = CPL_ABORT_SEND_RST; -free_ports: - kfree(ports); - kfree(cdata); + l2t_send(c3cn->cdev, skb, c3cn->l2t); } -void cxgb3i_sdev_remove(struct t3cdev *cdev) +/* + * CPL connection abort reply: host -> + * + * Send an ABORT_RPL message in response of the ABORT_REQ received. + */ +static void send_abort_rpl(struct s3_conn *c3cn, int rst_status) { - struct cxgb3i_sdev_data *cdata = CXGB3_SDEV_DATA(cdev); - struct adap_ports *ports = cdata->ports; - int i; + struct sk_buff *skb = c3cn->cpl_abort_rpl; + struct cpl_abort_rpl *rpl = (struct cpl_abort_rpl *)skb->head; - for (i = 0; i < ports->nports; i++) - NDEV2CDATA(ports->lldevs[i]) = NULL; + c3cn->cpl_abort_rpl = NULL; - mutex_lock(&cxgb3_list_lock); - list_del(&cdata->list); - mutex_unlock(&cxgb3_list_lock); + skb->priority = CPL_PRIORITY_DATA; + rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); + rpl->wr.wr_lo = htonl(V_WR_TID(c3cn->tid)); + OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, c3cn->tid)); + rpl->cmd = rst_status; - kfree(ports); - kfree(cdata); + cxgb3_ofld_send(c3cn->cdev, skb); } /* - * Return TRUE if the specified net device is for a port on one of our - * registered adapters. + * CPL connection rx data ack: host -> + * Send RX credits through an RX_DATA_ACK CPL message. Returns the number of + * credits sent. */ -static int is_cxgb3_dev(struct net_device *dev) +static u32 send_rx_credits(struct s3_conn *c3cn, u32 credits, u32 dack) { - struct cxgb3i_sdev_data *cdata; + struct sk_buff *skb; + struct cpl_rx_data_ack *req; - mutex_lock(&cxgb3_list_lock); - list_for_each_entry(cdata, &cxgb3_list, list) { - struct adap_ports *ports = cdata->ports; - int i; + skb = alloc_skb(sizeof(*req), GFP_ATOMIC); + if (!skb) + return 0; - for (i = 0; i < ports->nports; i++) - if (dev == ports->lldevs[i]) { - mutex_unlock(&cxgb3_list_lock); - return 1; - } - } - mutex_unlock(&cxgb3_list_lock); - return 0; + req = (struct cpl_rx_data_ack *)__skb_put(skb, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, c3cn->tid)); + req->credit_dack = htonl(dack | V_RX_CREDITS(credits)); + skb->priority = CPL_PRIORITY_ACK; + cxgb3_ofld_send(c3cn->cdev, skb); + return credits; } /* - * Primary cxgb3 API operations. - * ============================= + * CPL connection tx data: host -> + * + * Send iscsi PDU via TX_DATA CPL message. Returns the number of + * credits sent. + * Each TX_DATA consumes work request credit (wrs), so we need to keep track of + * how many we've used so far and how many are pending (i.e., yet ack'ed by T3). */ -static int s3_push_frames(struct s3_conn *, int); -static int s3_send_reset(struct s3_conn *, int, struct sk_buff *); -static void t3_release_offload_resources(struct s3_conn *); -static void mk_close_req(struct s3_conn *); - -struct s3_conn *cxgb3i_c3cn_create(void) -{ - struct s3_conn *c3cn; - - c3cn = kzalloc(sizeof(*c3cn), GFP_KERNEL); - if (c3cn == NULL) - return NULL; - - c3cn_conn_debug("alloc c3cn 0x%p.\n", c3cn); - - c3cn->flags = 0; - spin_lock_init(&c3cn->lock); - atomic_set(&c3cn->refcnt, 1); - skb_queue_head_init(&c3cn->receive_queue); - skb_queue_head_init(&c3cn->write_queue); - setup_timer(&c3cn->retry_timer, NULL, (unsigned long)c3cn); - rwlock_init(&c3cn->callback_lock); - - return c3cn; -} - -static inline void s3_purge_write_queue(struct s3_conn *c3cn) +/* + * For ULP connections HW may inserts digest bytes into the pdu. Those digest + * bytes are not sent by the host but are part of the TCP payload and therefore + * consume TCP sequence space. + */ +static const unsigned int cxgb3_ulp_extra_len[] = { 0, 4, 4, 8 }; +static inline unsigned int ulp_extra_len(const struct sk_buff *skb) { - struct sk_buff *skb; - - while ((skb = __skb_dequeue(&c3cn->write_queue))) - __kfree_skb(skb); + return cxgb3_ulp_extra_len[skb_ulp_mode(skb) & 3]; } -static void c3cn_done(struct s3_conn *c3cn) -{ - c3cn_conn_debug("c3cn 0x%p, state 0x%x, flag 0x%lx.\n", - c3cn, c3cn->state, c3cn->flags); +static unsigned int wrlen __read_mostly; - c3cn_put_port(c3cn); - t3_release_offload_resources(c3cn); - c3cn_set_state(c3cn, C3CN_STATE_CLOSE); - c3cn->shutdown = C3CN_SHUTDOWN_MASK; - cxgb3i_conn_closing(c3cn); -} +/* + * The number of WRs needed for an skb depends on the number of fragments + * in the skb and whether it has any payload in its main body. This maps the + * length of the gather list represented by an skb into the # of necessary WRs. + * + * The max. length of an skb is controlled by the max pdu size which is ~16K. + * Also, assume the min. fragment length is the sector size (512), then add + * extra fragment counts for iscsi bhs and payload padding. + */ +#define SKB_WR_LIST_SIZE (16384/512 + 3) +static unsigned int skb_wrs[SKB_WR_LIST_SIZE] __read_mostly; -static void c3cn_close(struct s3_conn *c3cn) +static void s3_init_wr_tab(unsigned int wr_len) { - int data_lost, old_state; - - c3cn_conn_debug("c3cn 0x%p, state 0x%x, flag 0x%lx.\n", - c3cn, c3cn->state, c3cn->flags); - - dst_confirm(c3cn->dst_cache); + int i; - spin_lock_bh(&c3cn->lock); - c3cn->shutdown |= C3CN_SHUTDOWN_MASK; + if (skb_wrs[1]) /* already initialized */ + return; - /* - * We need to flush the receive buffs. We do this only on the - * descriptor close, not protocol-sourced closes, because the - * reader process may not have drained the data yet! Make a note - * of whether any received data will be lost so we can decide whether - * to FIN or RST. - */ - data_lost = skb_queue_len(&c3cn->receive_queue); - __skb_queue_purge(&c3cn->receive_queue); + for (i = 1; i < SKB_WR_LIST_SIZE; i++) { + int sgl_len = (3 * i) / 2 + (i & 1); - if (c3cn->state == C3CN_STATE_CLOSE) - /* Nothing if we are already closed */ - c3cn_conn_debug("c3cn 0x%p, 0x%x, already closed.\n", - c3cn, c3cn->state); - else if (data_lost || c3cn->state == C3CN_STATE_SYN_SENT) { - c3cn_conn_debug("c3cn 0x%p, 0x%x -> closing, send reset.\n", - c3cn, c3cn->state); - /* Unread data was tossed, zap the connection. */ - s3_send_reset(c3cn, CPL_ABORT_SEND_RST, NULL); - goto unlock; - } else if (c3cn->state == C3CN_STATE_ESTABLISHED) { - c3cn_conn_debug("c3cn 0x%p, est. -> closing, send close_req.\n", - c3cn); - c3cn_set_state(c3cn, C3CN_STATE_CLOSING); - mk_close_req(c3cn); + sgl_len += 3; + skb_wrs[i] = (sgl_len <= wr_len + ? 1 : 1 + (sgl_len - 2) / (wr_len - 1)); } -unlock: - old_state = c3cn->state; - c3cn_hold(c3cn); /* must last past the potential destroy() */ - - spin_unlock_bh(&c3cn->lock); - - /* - * There are no more user references at this point. Grab the - * connection lock and finish the close. - */ - local_bh_disable(); - spin_lock(&c3cn->lock); - - /* - * Because the connection was orphaned before the spin_lock() - * either the backlog or a BH may have already destroyed it. - * Bail out if so. - */ - if (old_state != C3CN_STATE_CLOSE && c3cn->state == C3CN_STATE_CLOSE) - goto out; - - if (c3cn->state == C3CN_STATE_CLOSE) - s3_purge_write_queue(c3cn); - -out: - spin_unlock(&c3cn->lock); - local_bh_enable(); - c3cn_put(c3cn); -} - -void cxgb3i_c3cn_release(struct s3_conn *c3cn) -{ - c3cn_conn_debug("c3cn 0x%p, s 0x%x, f 0x%lx.\n", - c3cn, c3cn->state, c3cn->flags); - if (likely(c3cn->state != C3CN_STATE_SYN_SENT)) - c3cn_close(c3cn); - else - c3cn_set_flag(c3cn, C3CN_CLOSE_NEEDED); - c3cn_put(c3cn); + wrlen = wr_len * 8; } - -/* - * Local utility routines used to implement primary cxgb3 API operations. - * ====================================================================== - */ - -static u32 s3_send_rx_credits(struct s3_conn *, u32, u32, int); -static int act_open(struct s3_conn *, struct net_device *); -static void mk_act_open_req(struct s3_conn *, struct sk_buff *, - unsigned int, const struct l2t_entry *); -static void skb_entail(struct s3_conn *, struct sk_buff *, int); - static inline void reset_wr_list(struct s3_conn *c3cn) { c3cn->wr_pending_head = NULL; @@ -525,7 +500,7 @@ static inline void enqueue_wr(struct s3_conn *c3cn, /* * We want to take an extra reference since both us and the driver - * need to free the packet before it's really freed. We know there's + * need to free the packet before it's really freed. We know there's * just one user currently so we use atomic_set rather than skb_get * to avoid the atomic op. */ @@ -538,34 +513,37 @@ static inline void enqueue_wr(struct s3_conn *c3cn, c3cn->wr_pending_tail = skb; } -/* - * The next two functions calculate the option 0 value for a connection. - */ -static inline int compute_wscale(int win) +static inline struct sk_buff *peek_wr(const struct s3_conn *c3cn) { - int wscale = 0; - while (wscale < 14 && (65535<<wscale) < win) - wscale++; - return wscale; + return c3cn->wr_pending_head; } -static inline unsigned int calc_opt0h(struct s3_conn *c3cn) +static inline void free_wr_skb(struct sk_buff *skb) { - int wscale = compute_wscale(cxgb3_rcv_win); - return V_KEEP_ALIVE(1) | - F_TCAM_BYPASS | - V_WND_SCALE(wscale) | - V_MSS_IDX(c3cn->mss_idx); + kfree_skb(skb); } -static inline unsigned int calc_opt0l(struct s3_conn *c3cn) +static inline struct sk_buff *dequeue_wr(struct s3_conn *c3cn) { - return V_ULP_MODE(ULP_MODE_ISCSI) | - V_RCV_BUFSIZ(cxgb3_rcv_win>>10); + struct sk_buff *skb = c3cn->wr_pending_head; + + if (likely(skb)) { + /* Don't bother clearing the tail */ + c3cn->wr_pending_head = (struct sk_buff *)skb->sp; + skb->sp = NULL; + } + return skb; } -static inline void make_tx_data_wr(struct s3_conn *c3cn, - struct sk_buff *skb, int len) +static void purge_wr_queue(struct s3_conn *c3cn) +{ + struct sk_buff *skb; + while ((skb = dequeue_wr(c3cn)) != NULL) + free_wr_skb(skb); +} + +static inline void make_tx_data_wr(struct s3_conn *c3cn, struct sk_buff *skb, + int len) { struct tx_data_wr *req; @@ -582,242 +560,49 @@ static inline void make_tx_data_wr(struct s3_conn *c3cn, V_TX_SHOVE((skb_peek(&c3cn->write_queue) ? 0 : 1))); if (!c3cn_flag(c3cn, C3CN_TX_DATA_SENT)) { - req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT | V_TX_CPU_IDX(c3cn->qset)); - - /* Sendbuffer is in units of 32KB. - */ + /* Sendbuffer is in units of 32KB. */ req->param |= htonl(V_TX_SNDBUF(cxgb3_snd_win >> 15)); c3cn_set_flag(c3cn, C3CN_TX_DATA_SENT); } } /** - * cxgb3_egress_dev - return the cxgb3 egress device - * @root_dev: the root device anchoring the search - * @c3cn: the connection used to determine egress port in bonding mode - * @context: in bonding mode, indicates a connection set up or failover + * c3cn_push_tx_frames -- start transmit + * @c3cn: the offloaded connection + * @req_completion: request wr_ack or not * - * Return egress device or NULL if the egress device isn't one of our ports. - * - * Given a root network device it returns the physical egress device that is a - * descendant of the root device. The root device may be either a physical - * device, in which case it is the device returned, or a virtual device, such - * as a VLAN or bonding device. In case of a bonding device the search - * considers the decisions of the bonding device given its mode to locate the - * correct egress device. - */ -static struct net_device *cxgb3_egress_dev(struct net_device *root_dev, - struct s3_conn *c3cn, - int context) -{ - while (root_dev) { - if (root_dev->priv_flags & IFF_802_1Q_VLAN) - root_dev = vlan_dev_real_dev(root_dev); - else if (is_cxgb3_dev(root_dev)) - return root_dev; - else - return NULL; - } - return NULL; -} - -static struct rtable *find_route(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport) -{ - struct rtable *rt; - struct flowi fl = { - .oif = 0, - .nl_u = { - .ip4_u = { - .daddr = daddr, - .saddr = saddr, - .tos = 0 } }, - .proto = IPPROTO_TCP, - .uli_u = { - .ports = { - .sport = sport, - .dport = dport } } }; - - if (ip_route_output_flow(&init_net, &rt, &fl, NULL, 0)) - return NULL; - return rt; -} - -int cxgb3i_c3cn_connect(struct s3_conn *c3cn, struct sockaddr_in *usin) -{ - struct rtable *rt; - struct net_device *dev; - struct cxgb3i_sdev_data *cdata; - struct t3cdev *cdev; - __be32 sipv4; - int err; - - if (usin->sin_family != AF_INET) - return -EAFNOSUPPORT; - - /* get a source port if one hasn't been provided */ - err = c3cn_get_port(c3cn); - if (err) - return err; - - c3cn_conn_debug("c3cn 0x%p get port %u.\n", - c3cn, ntohs(c3cn->saddr.sin_port)); - - c3cn->daddr.sin_port = usin->sin_port; - c3cn->daddr.sin_addr.s_addr = usin->sin_addr.s_addr; - - rt = find_route(c3cn->saddr.sin_addr.s_addr, - c3cn->daddr.sin_addr.s_addr, - c3cn->saddr.sin_port, - c3cn->daddr.sin_port); - if (rt == NULL) { - c3cn_conn_debug("NO route to 0x%x, port %u.\n", - c3cn->daddr.sin_addr.s_addr, - ntohs(c3cn->daddr.sin_port)); - return -ENETUNREACH; - } - - if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { - c3cn_conn_debug("multi-cast route to 0x%x, port %u.\n", - c3cn->daddr.sin_addr.s_addr, - ntohs(c3cn->daddr.sin_port)); - ip_rt_put(rt); - return -ENETUNREACH; - } - - if (!c3cn->saddr.sin_addr.s_addr) - c3cn->saddr.sin_addr.s_addr = rt->rt_src; - - /* now commit destination to connection */ - c3cn->dst_cache = &rt->u.dst; - - /* try to establish an offloaded connection */ - dev = cxgb3_egress_dev(c3cn->dst_cache->dev, c3cn, 0); - if (dev == NULL) { - c3cn_conn_debug("c3cn 0x%p, egress dev NULL.\n", c3cn); - return -ENETUNREACH; - } - cdata = NDEV2CDATA(dev); - cdev = cdata->cdev; - - sipv4 = cxgb3i_get_private_ipv4addr(dev); - if (!sipv4) { - c3cn_conn_debug("c3cn 0x%p, iscsi ip not configured.\n", c3cn); - sipv4 = c3cn->saddr.sin_addr.s_addr; - cxgb3i_set_private_ipv4addr(dev, sipv4); - } else - c3cn->saddr.sin_addr.s_addr = sipv4; - - c3cn_conn_debug("c3cn 0x%p, %u.%u.%u.%u,%u-%u.%u.%u.%u,%u SYN_SENT.\n", - c3cn, NIPQUAD(c3cn->saddr.sin_addr.s_addr), - ntohs(c3cn->saddr.sin_port), - NIPQUAD(c3cn->daddr.sin_addr.s_addr), - ntohs(c3cn->daddr.sin_port)); - - c3cn_set_state(c3cn, C3CN_STATE_SYN_SENT); - - if (!act_open(c3cn, dev)) - return 0; - - /* - * If we get here, we don't have an offload connection so simply - * return a failure. - */ - err = -ENOTSUPP; - - /* - * This trashes the connection and releases the local port, - * if necessary. - */ - c3cn_conn_debug("c3cn 0x%p -> CLOSE.\n", c3cn); - c3cn_set_state(c3cn, C3CN_STATE_CLOSE); - ip_rt_put(rt); - c3cn_put_port(c3cn); - c3cn->daddr.sin_port = 0; - return err; -} - -/* - * Set of states for which we should return RX credits. - */ -#define CREDIT_RETURN_STATE (C3CN_STATE_ESTABLISHED) - -/* - * Called after some received data has been read. It returns RX credits - * to the HW for the amount of data processed. - */ -void cxgb3i_c3cn_rx_credits(struct s3_conn *c3cn, int copied) -{ - struct t3cdev *cdev; - int must_send; - u32 credits, dack = 0; - - if (!c3cn_in_state(c3cn, CREDIT_RETURN_STATE)) - return; - - credits = c3cn->copied_seq - c3cn->rcv_wup; - if (unlikely(!credits)) - return; - - cdev = c3cn->cdev; - - if (unlikely(cxgb3_rx_credit_thres == 0)) - return; - - dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); - - /* - * For coalescing to work effectively ensure the receive window has - * at least 16KB left. - */ - must_send = credits + 16384 >= cxgb3_rcv_win; - - if (must_send || credits >= cxgb3_rx_credit_thres) - c3cn->rcv_wup += s3_send_rx_credits(c3cn, credits, dack, - must_send); -} - -/* - * Generic ARP failure handler that discards the buffer. + * Prepends TX_DATA_WR or CPL_CLOSE_CON_REQ headers to buffers waiting in a + * connection's send queue and sends them on to T3. Must be called with the + * connection's lock held. Returns the amount of send buffer space that was + * freed as a result of sending queued data to T3. */ static void arp_failure_discard(struct t3cdev *cdev, struct sk_buff *skb) { kfree_skb(skb); } -/* - * Prepends TX_DATA_WR or CPL_CLOSE_CON_REQ headers to buffers waiting in a - * connection's send queue and sends them on to T3. Must be called with the - * connection's lock held. Returns the amount of send buffer space that was - * freed as a result of sending queued data to T3. - */ -static int s3_push_frames(struct s3_conn *c3cn, int req_completion) +static int c3cn_push_tx_frames(struct s3_conn *c3cn, int req_completion) { int total_size = 0; struct sk_buff *skb; struct t3cdev *cdev; struct cxgb3i_sdev_data *cdata; - if (unlikely(c3cn_in_state(c3cn, - C3CN_STATE_SYN_SENT | C3CN_STATE_CLOSE))) - return 0; - - /* - * We shouldn't really be called at all after an abort but check just - * in case. - */ - if (unlikely(c3cn_flag(c3cn, C3CN_ABORT_SHUTDOWN))) + if (unlikely(c3cn->state == C3CN_STATE_CONNECTING || + c3cn->state == C3CN_STATE_CLOSE_WAIT_1 || + c3cn->state == C3CN_STATE_ABORTING)) { + c3cn_tx_debug("c3cn 0x%p, in closing state %u.\n", + c3cn, c3cn->state); return 0; + } cdev = c3cn->cdev; cdata = CXGB3_SDEV_DATA(cdev); while (c3cn->wr_avail - && (skb = skb_peek(&c3cn->write_queue)) != NULL - && !c3cn_flag(c3cn, C3CN_TX_WAIT_IDLE)) { - + && (skb = skb_peek(&c3cn->write_queue)) != NULL) { int len = skb->len; /* length before skb_push */ int frags = skb_shinfo(skb)->nr_frags + (len != skb->data_len); int wrs_needed = skb_wrs[frags]; @@ -825,10 +610,15 @@ static int s3_push_frames(struct s3_conn *c3cn, int req_completion) if (wrs_needed > 1 && len + sizeof(struct tx_data_wr) <= wrlen) wrs_needed = 1; - WARN_ON(frags >= ARRAY_SIZE(skb_wrs) || wrs_needed < 1); + WARN_ON(frags >= SKB_WR_LIST_SIZE || wrs_needed < 1); - if (c3cn->wr_avail < wrs_needed) + if (c3cn->wr_avail < wrs_needed) { + c3cn_tx_debug("c3cn 0x%p, skb len %u/%u, frag %u, " + "wr %d < %u.\n", + c3cn, skb->len, skb->datalen, frags, + wrs_needed, c3cn->wr_avail); break; + } __skb_unlink(skb, &c3cn->write_queue); skb->priority = CPL_PRIORITY_DATA; @@ -851,8 +641,7 @@ static int s3_push_frames(struct s3_conn *c3cn, int req_completion) c3cn->wr_unacked = 0; } CXGB3_SKB_CB(skb)->flags &= ~C3CB_FLAG_NEED_HDR; - } else if (skb->data[0] == FW_WROPCODE_OFLD_CLOSE_CON) - c3cn_set_flag(c3cn, C3CN_CLOSE_CON_REQUESTED); + } total_size += skb->truesize; set_arp_failure_handler(skb, arp_failure_discard); @@ -862,487 +651,345 @@ static int s3_push_frames(struct s3_conn *c3cn, int req_completion) } /* - * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant - * and send it along. - */ -static void abort_arp_failure(struct t3cdev *cdev, struct sk_buff *skb) -{ - struct cpl_abort_req *req = cplhdr(skb); - - c3cn_conn_debug("tdev 0x%p.\n", cdev); - - req->cmd = CPL_ABORT_NO_RST; - cxgb3_ofld_send(cdev, skb); -} - -/* - * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do - * not send multiple ABORT_REQs for the same connection and also that we do - * not try to send a message after the connection has closed. Returns 1 if - * an ABORT_REQ wasn't generated after all, 0 otherwise. + * process_cpl_msg: -> host + * Top-level CPL message processing used by most CPL messages that + * pertain to connections. */ -static int s3_send_reset(struct s3_conn *c3cn, int mode, - struct sk_buff *skb) -{ - struct cpl_abort_req *req; - unsigned int tid = c3cn->tid; - - if (unlikely(c3cn_flag(c3cn, C3CN_ABORT_SHUTDOWN) || !c3cn->cdev)) { - if (skb) - __kfree_skb(skb); - return 1; - } - - c3cn_conn_debug("c3cn 0x%p, mode %d, flag ABORT_RPL + ABORT_SHUT.\n", - c3cn, mode); - - c3cn_set_flag(c3cn, C3CN_ABORT_RPL_PENDING); - c3cn_set_flag(c3cn, C3CN_ABORT_SHUTDOWN); - - /* Purge the send queue so we don't send anything after an abort. */ - s3_purge_write_queue(c3cn); - - if (!skb) - skb = alloc_skb(sizeof(*req), GFP_KERNEL | __GFP_NOFAIL); - skb->priority = CPL_PRIORITY_DATA; - set_arp_failure_handler(skb, abort_arp_failure); - - req = (struct cpl_abort_req *)skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); - req->wr.wr_lo = htonl(V_WR_TID(tid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); - req->rsvd0 = htonl(c3cn->snd_nxt); - req->rsvd1 = !c3cn_flag(c3cn, C3CN_TX_DATA_SENT); - req->cmd = mode; - - l2t_send(c3cn->cdev, skb, c3cn->l2t); - return 0; -} - -/* - * Add a list of skbs to a connection send queue. This interface is intended - * for use by in-kernel ULPs. The skbs must comply with the max size limit of - * the device and have a headroom of at least TX_HEADER_LEN bytes. - */ -int cxgb3i_c3cn_send_pdus(struct s3_conn *c3cn, struct sk_buff *skb, int flags) +static inline void process_cpl_msg(void (*fn)(struct s3_conn *, + struct sk_buff *), + struct s3_conn *c3cn, + struct sk_buff *skb) { - struct sk_buff *next; - int err, copied = 0; - spin_lock_bh(&c3cn->lock); - - if (!c3cn_in_state(c3cn, C3CN_STATE_ESTABLISHED)) { - err = -EAGAIN; - goto out_err; - } - - err = -EPIPE; - if (c3cn->err || (c3cn->shutdown & C3CN_SEND_SHUTDOWN)) - goto out_err; - - while (skb) { - if (unlikely(skb_headroom(skb) < TX_HEADER_LEN)) { - c3cn_tx_debug("c3cn 0x%p, skb head.\n", c3cn); - err = -EINVAL; - goto out_err; - } - - next = skb->next; - skb->next = NULL; - skb_entail(c3cn, skb, C3CB_FLAG_NO_APPEND | C3CB_FLAG_NEED_HDR); - copied += skb->len; - c3cn->write_seq += skb->len + ulp_extra_len(skb); - skb = next; - } -done: - if (likely(skb_queue_len(&c3cn->write_queue))) - s3_push_frames(c3cn, 1); + fn(c3cn, skb); spin_unlock_bh(&c3cn->lock); - return copied; - -out_err: - if (copied == 0 && err == -EPIPE) - copied = c3cn->err ? c3cn->err : -EPIPE; - goto done; } /* - * Low-level utility routines for primary API functions. - * ===================================================== - */ -/* routines to implement CPL message processing */ -static void c3cn_act_establish(struct s3_conn *, struct sk_buff *); -static void active_open_failed(struct s3_conn *, struct sk_buff *); -static void wr_ack(struct s3_conn *, struct sk_buff *); -static void do_peer_fin(struct s3_conn *, struct sk_buff *); -static void process_abort_req(struct s3_conn *, struct sk_buff *); -static void process_abort_rpl(struct s3_conn *, struct sk_buff *); -static void process_close_con_rpl(struct s3_conn *, struct sk_buff *); -static void process_rx_iscsi_hdr(struct s3_conn *, struct sk_buff *); - -static struct sk_buff *__get_cpl_reply_skb(struct sk_buff *, size_t, gfp_t); - -static void fail_act_open(struct s3_conn *, int); -static void init_offload_conn(struct s3_conn *, struct t3cdev *, - struct dst_entry *); - -/* - * Insert a connection into the TID table and take an extra reference. + * process_cpl_msg_ref: -> host + * Similar to process_cpl_msg() but takes an extra connection reference around + * the call to the handler. Should be used if the handler may drop a + * connection reference. */ -static inline void c3cn_insert_tid(struct cxgb3i_sdev_data *cdata, - struct s3_conn *c3cn, - unsigned int tid) +static inline void process_cpl_msg_ref(void (*fn) (struct s3_conn *, + struct sk_buff *), + struct s3_conn *c3cn, + struct sk_buff *skb) { c3cn_hold(c3cn); - cxgb3_insert_tid(cdata->cdev, cdata->client, c3cn, tid); + process_cpl_msg(fn, c3cn, skb); + c3cn_put(c3cn); } -static inline void free_atid(struct t3cdev *cdev, unsigned int tid) +/* + * Process a CPL_ACT_ESTABLISH message: -> host + * Updates connection state from an active establish CPL message. Runs with + * the connection lock held. + */ + +static inline void s3_free_atid(struct t3cdev *cdev, unsigned int tid) { struct s3_conn *c3cn = cxgb3_free_atid(cdev, tid); if (c3cn) c3cn_put(c3cn); } -/* - * This function is intended for allocations of small control messages. - * Such messages go as immediate data and usually the pakets are freed - * immediately. We maintain a cache of one small sk_buff and use it whenever - * it is available (has a user count of 1). Otherwise we get a fresh buffer. - */ -#define CTRL_SKB_LEN 120 - -static struct sk_buff *alloc_ctrl_skb(const struct s3_conn *c3cn, - int len) +static void c3cn_established(struct s3_conn *c3cn, u32 snd_isn, + unsigned int opt) { - struct sk_buff *skb = c3cn->ctrl_skb_cache; + c3cn_conn_debug("c3cn 0x%p, state %u.\n", c3cn, c3cn->state); - if (likely(skb && !skb_shared(skb) && !skb_cloned(skb))) { - __skb_trim(skb, 0); - atomic_set(&skb->users, 2); - } else if (likely(!in_atomic())) - skb = alloc_skb(len, GFP_ATOMIC | __GFP_NOFAIL); - else - skb = alloc_skb(len, GFP_ATOMIC); - return skb; -} + c3cn->write_seq = c3cn->snd_nxt = c3cn->snd_una = snd_isn; -/* - * Handle an ARP failure for an active open. - */ -static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb) -{ - struct s3_conn *c3cn = (struct s3_conn *)skb->sk; + /* + * Causes the first RX_DATA_ACK to supply any Rx credits we couldn't + * pass through opt0. + */ + if (cxgb3_rcv_win > (M_RCV_BUFSIZ << 10)) + c3cn->rcv_wup -= cxgb3_rcv_win - (M_RCV_BUFSIZ << 10); - c3cn_conn_debug("c3cn 0x%p, state 0x%x.\n", c3cn, c3cn->state); + dst_confirm(c3cn->dst_cache); - c3cn_hold(c3cn); - spin_lock(&c3cn->lock); - if (c3cn->state == C3CN_STATE_SYN_SENT) { - fail_act_open(c3cn, EHOSTUNREACH); - __kfree_skb(skb); - } - spin_unlock(&c3cn->lock); - c3cn_put(c3cn); + smp_mb(); + + c3cn_set_state(c3cn, C3CN_STATE_ESTABLISHED); } -/* - * Send an active open request. - */ -static int act_open(struct s3_conn *c3cn, struct net_device *dev) +static void process_act_establish(struct s3_conn *c3cn, struct sk_buff *skb) { - struct cxgb3i_sdev_data *cdata = NDEV2CDATA(dev); - struct t3cdev *cdev = cdata->cdev; - struct dst_entry *dst = c3cn->dst_cache; - struct sk_buff *skb; + struct cpl_act_establish *req = cplhdr(skb); + u32 rcv_isn = ntohl(req->rcv_isn); /* real RCV_ISN + 1 */ - c3cn_conn_debug("c3cn 0x%p, state 0x%x, flag 0x%lx.\n", + c3cn_conn_debug("c3cn 0x%p, state %u, flag 0x%lx.\n", c3cn, c3cn->state, c3cn->flags); - /* - * Initialize connection data. Note that the flags and ULP mode are - * initialized higher up ... - */ - c3cn->dev = dev; - c3cn->cdev = cdev; - c3cn->tid = cxgb3_alloc_atid(cdev, cdata->client, c3cn); - if (c3cn->tid < 0) - goto out_err; - - c3cn->qset = 0; - c3cn->l2t = t3_l2t_get(cdev, dst->neighbour, dev); - if (!c3cn->l2t) - goto free_tid; - skb = alloc_skb(sizeof(struct cpl_act_open_req), GFP_KERNEL); - if (!skb) - goto free_l2t; + if (unlikely(c3cn->state != C3CN_STATE_CONNECTING)) + cxgb3i_log_error("TID %u expected SYN_SENT, got EST., s %u\n", + c3cn->tid, c3cn->state); - skb->sk = (struct sock *)c3cn; - set_arp_failure_handler(skb, act_open_req_arp_failure); - - c3cn_hold(c3cn); - - init_offload_conn(c3cn, cdev, dst); - c3cn->err = 0; - c3cn_reset_flag(c3cn, C3CN_DONE); + c3cn->copied_seq = c3cn->rcv_wup = c3cn->rcv_nxt = rcv_isn; + c3cn_established(c3cn, ntohl(req->snd_isn), ntohs(req->tcp_opt)); - mk_act_open_req(c3cn, skb, c3cn->tid, c3cn->l2t); - l2t_send(cdev, skb, c3cn->l2t); - return 0; + __kfree_skb(skb); -free_l2t: - l2t_release(L2DATA(cdev), c3cn->l2t); -free_tid: - free_atid(cdev, c3cn->tid); - c3cn->tid = 0; -out_err: - return -1; + if (unlikely(c3cn_flag(c3cn, C3CN_ACTIVE_CLOSE_NEEDED))) + /* upper layer has requested closing */ + send_abort_req(c3cn); + else if (c3cn_push_tx_frames(c3cn, 1)) + cxgb3i_conn_tx_open(c3cn); } -/* - * Close a connection by sending a CPL_CLOSE_CON_REQ message. Cannot fail - * under any circumstances. We take the easy way out and always queue the - * message to the write_queue. We can optimize the case where the queue is - * already empty though the optimization is probably not worth it. - */ -static void mk_close_req(struct s3_conn *c3cn) +static int do_act_establish(struct t3cdev *cdev, struct sk_buff *skb, + void *ctx) { - struct sk_buff *skb; - struct cpl_close_con_req *req; - unsigned int tid = c3cn->tid; + struct cpl_act_establish *req = cplhdr(skb); + unsigned int tid = GET_TID(req); + unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid)); + struct s3_conn *c3cn = ctx; + struct cxgb3i_sdev_data *cdata = CXGB3_SDEV_DATA(cdev); - c3cn_conn_debug("c3cn 0x%p, state 0x%x, flag 0x%lx.\n", - c3cn, c3cn->state, c3cn->flags); + c3cn_conn_debug("rcv, tid 0x%x, c3cn 0x%p, s %u, f 0x%lx.\n", + tid, c3cn, c3cn->state, c3cn->flags); - skb = alloc_skb(sizeof(struct cpl_close_con_req), - GFP_KERNEL | __GFP_NOFAIL); - req = (struct cpl_close_con_req *)__skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); - req->wr.wr_lo = htonl(V_WR_TID(tid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); - req->rsvd = htonl(c3cn->write_seq); + c3cn->tid = tid; + c3cn_hold(c3cn); + cxgb3_insert_tid(cdata->cdev, cdata->client, c3cn, tid); + s3_free_atid(cdev, atid); - skb_entail(c3cn, skb, C3CB_FLAG_NO_APPEND); - if (c3cn->state != C3CN_STATE_SYN_SENT) - s3_push_frames(c3cn, 1); -} + c3cn->qset = G_QNUM(ntohl(skb->csum)); -static void skb_entail(struct s3_conn *c3cn, struct sk_buff *skb, - int flags) -{ - CXGB3_SKB_CB(skb)->seq = c3cn->write_seq; - CXGB3_SKB_CB(skb)->flags = flags; - __skb_queue_tail(&c3cn->write_queue, skb); + process_cpl_msg(process_act_establish, c3cn, skb); + return 0; } /* - * Send RX credits through an RX_DATA_ACK CPL message. If nofail is 0 we are - * permitted to return without sending the message in case we cannot allocate - * an sk_buff. Returns the number of credits sent. + * Process a CPL_ACT_OPEN_RPL message: -> host + * Handle active open failures. */ -static u32 s3_send_rx_credits(struct s3_conn *c3cn, u32 credits, u32 dack, - int nofail) +static int act_open_rpl_status_to_errno(int status) +{ + switch (status) { + case CPL_ERR_CONN_RESET: + return ECONNREFUSED; + case CPL_ERR_ARP_MISS: + return EHOSTUNREACH; + case CPL_ERR_CONN_TIMEDOUT: + return ETIMEDOUT; + case CPL_ERR_TCAM_FULL: + return ENOMEM; + case CPL_ERR_CONN_EXIST: + cxgb3i_log_error("ACTIVE_OPEN_RPL: 4-tuple in use\n"); + return EADDRINUSE; + default: + return EIO; + } +} + +static void act_open_retry_timer(unsigned long data) { struct sk_buff *skb; - struct cpl_rx_data_ack *req; + struct s3_conn *c3cn = (struct s3_conn *)data; - skb = (nofail ? alloc_ctrl_skb(c3cn, sizeof(*req)) - : alloc_skb(sizeof(*req), GFP_ATOMIC)); - if (!skb) - return 0; + c3cn_conn_debug("c3cn 0x%p, state %u.\n", c3cn, c3cn->state); - req = (struct cpl_rx_data_ack *)__skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, c3cn->tid)); - req->credit_dack = htonl(dack | V_RX_CREDITS(credits)); - skb->priority = CPL_PRIORITY_ACK; - cxgb3_ofld_send(c3cn->cdev, skb); - return credits; + spin_lock_bh(&c3cn->lock); + skb = alloc_skb(sizeof(struct cpl_act_open_req), GFP_ATOMIC); + if (!skb) + fail_act_open(c3cn, ENOMEM); + else { + skb->sk = (struct sock *)c3cn; + set_arp_failure_handler(skb, act_open_req_arp_failure); + make_act_open_req(c3cn, skb, c3cn->tid, c3cn->l2t); + l2t_send(c3cn->cdev, skb, c3cn->l2t); + } + spin_unlock_bh(&c3cn->lock); + c3cn_put(c3cn); } -static void mk_act_open_req(struct s3_conn *c3cn, struct sk_buff *skb, - unsigned int atid, const struct l2t_entry *e) +static void process_act_open_rpl(struct s3_conn *c3cn, struct sk_buff *skb) { - struct cpl_act_open_req *req; + struct cpl_act_open_rpl *rpl = cplhdr(skb); - c3cn_conn_debug("c3cn 0x%p, atid 0x%x.\n", c3cn, atid); + c3cn_conn_debug("c3cn 0x%p, state %u, flag 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); - skb->priority = CPL_PRIORITY_SETUP; - req = (struct cpl_act_open_req *)__skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid)); - req->local_port = c3cn->saddr.sin_port; - req->peer_port = c3cn->daddr.sin_port; - req->local_ip = c3cn->saddr.sin_addr.s_addr; - req->peer_ip = c3cn->daddr.sin_addr.s_addr; - req->opt0h = htonl(calc_opt0h(c3cn) | V_L2T_IDX(e->idx) | - V_TX_CHANNEL(e->smt_idx)); - req->opt0l = htonl(calc_opt0l(c3cn)); - req->params = 0; + if (rpl->status == CPL_ERR_CONN_EXIST && + c3cn->retry_timer.function != act_open_retry_timer) { + c3cn->retry_timer.function = act_open_retry_timer; + if (!mod_timer(&c3cn->retry_timer, jiffies + HZ / 2)) + c3cn_hold(c3cn); + } else + fail_act_open(c3cn, act_open_rpl_status_to_errno(rpl->status)); + __kfree_skb(skb); } -/* - * Definitions and declarations for CPL handler functions. - * ======================================================= - */ - -/* - * Similar to process_cpl_msg() but takes an extra connection reference around - * the call to the handler. Should be used if the handler may drop a - * connection reference. - */ -static inline void process_cpl_msg_ref(void (*fn) (struct s3_conn *, - struct sk_buff *), - struct s3_conn *c3cn, - struct sk_buff *skb) +static int do_act_open_rpl(struct t3cdev *cdev, struct sk_buff *skb, void *ctx) { - c3cn_hold(c3cn); - process_cpl_msg(fn, c3cn, skb); - c3cn_put(c3cn); -} + struct s3_conn *c3cn = ctx; + struct cpl_act_open_rpl *rpl = cplhdr(skb); -/* - * Return whether a failed active open has allocated a TID - */ -static inline int act_open_has_tid(int status) -{ - return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && - status != CPL_ERR_ARP_MISS; -} + c3cn_conn_debug("rcv, status 0x%x, c3cn 0x%p, s %u, f 0x%lx.\n", + rpl->status, c3cn, c3cn->state, c3cn->flags); -/* - * Returns true if a connection cannot accept new Rx data. - */ -static inline int c3cn_no_receive(const struct s3_conn *c3cn) -{ - return c3cn->shutdown & C3CN_RCV_SHUTDOWN; + if (rpl->status != CPL_ERR_TCAM_FULL && + rpl->status != CPL_ERR_CONN_EXIST && + rpl->status != CPL_ERR_ARP_MISS) + cxgb3_queue_tid_release(cdev, GET_TID(rpl)); + + process_cpl_msg_ref(process_act_open_rpl, c3cn, skb); + return 0; } /* - * A helper function that aborts a connection and increments the given MIB - * counter. The supplied skb is used to generate the ABORT_REQ message if - * possible. Must be called with softirqs disabled. + * Process PEER_CLOSE CPL messages: -> host + * Handle peer FIN. */ -static inline void abort_conn(struct s3_conn *c3cn, - struct sk_buff *skb) +static void process_peer_close(struct s3_conn *c3cn, struct sk_buff *skb) { - struct sk_buff *abort_skb; - - c3cn_conn_debug("c3cn 0x%p, state 0x%x, flag 0x%lx.\n", + c3cn_conn_debug("c3cn 0x%p, state %u, flag 0x%lx.\n", c3cn, c3cn->state, c3cn->flags); - abort_skb = __get_cpl_reply_skb(skb, sizeof(struct cpl_abort_req), - GFP_ATOMIC); - if (abort_skb) - s3_send_reset(c3cn, CPL_ABORT_SEND_RST, abort_skb); -} + if (c3cn_flag(c3cn, C3CN_ABORT_RPL_PENDING)) + goto out; -/* - * Returns whether an ABORT_REQ_RSS message is a negative advice. - */ -static inline int is_neg_adv_abort(unsigned int status) -{ - return status == CPL_ERR_RTX_NEG_ADVICE || - status == CPL_ERR_PERSIST_NEG_ADVICE; -} + switch (c3cn->state) { + case C3CN_STATE_ESTABLISHED: + c3cn_set_state(c3cn, C3CN_STATE_PASSIVE_CLOSE); + break; + case C3CN_STATE_ACTIVE_CLOSE: + c3cn_set_state(c3cn, C3CN_STATE_CLOSE_WAIT_2); + break; + case C3CN_STATE_CLOSE_WAIT_1: + c3cn_closed(c3cn); + break; + case C3CN_STATE_ABORTING: + break; + default: + cxgb3i_log_error("%s: peer close, TID %u in bad state %u\n", + c3cn->cdev->name, c3cn->tid, c3cn->state); + } -/* - * CPL handler functions. - * ====================== - */ + cxgb3i_conn_closing(c3cn); +out: + __kfree_skb(skb); +} -/* - * Process a CPL_ACT_ESTABLISH message. - */ -static int do_act_establish(struct t3cdev *cdev, struct sk_buff *skb, - void *ctx) +static int do_peer_close(struct t3cdev *cdev, struct sk_buff *skb, void *ctx) { - struct cpl_act_establish *req = cplhdr(skb); - unsigned int tid = GET_TID(req); - unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid)); struct s3_conn *c3cn = ctx; - struct cxgb3i_sdev_data *cdata = CXGB3_SDEV_DATA(cdev); - - c3cn_conn_debug("rcv, tid 0x%x, c3cn 0x%p, 0x%x, 0x%lx.\n", - tid, c3cn, c3cn->state, c3cn->flags); - /* - * It's OK if the TID is currently in use, the owning connection may - * have backlogged its last CPL message(s). Just take it away. - */ - c3cn->tid = tid; - c3cn_insert_tid(cdata, c3cn, tid); - free_atid(cdev, atid); - c3cn->qset = G_QNUM(ntohl(skb->csum)); - - process_cpl_msg(c3cn_act_establish, c3cn, skb); + c3cn_conn_debug("rcv, c3cn 0x%p, s %u, f 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + process_cpl_msg_ref(process_peer_close, c3cn, skb); return 0; } /* - * Process an ACT_OPEN_RPL CPL message. + * Process CLOSE_CONN_RPL CPL message: -> host + * Process a peer ACK to our FIN. */ -static int do_act_open_rpl(struct t3cdev *cdev, struct sk_buff *skb, void *ctx) +static void process_close_con_rpl(struct s3_conn *c3cn, struct sk_buff *skb) { - struct s3_conn *c3cn = ctx; - struct cpl_act_open_rpl *rpl = cplhdr(skb); + struct cpl_close_con_rpl *rpl = cplhdr(skb); - c3cn_conn_debug("rcv, status 0x%x, c3cn 0x%p, 0x%x, 0x%lx.\n", - rpl->status, c3cn, c3cn->state, c3cn->flags); + c3cn_conn_debug("c3cn 0x%p, state %u, flag 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); - if (act_open_has_tid(rpl->status)) - cxgb3_queue_tid_release(cdev, GET_TID(rpl)); + c3cn->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */ - process_cpl_msg_ref(active_open_failed, c3cn, skb); - return 0; + if (c3cn_flag(c3cn, C3CN_ABORT_RPL_PENDING)) + goto out; + + switch (c3cn->state) { + case C3CN_STATE_ACTIVE_CLOSE: + c3cn_set_state(c3cn, C3CN_STATE_CLOSE_WAIT_1); + break; + case C3CN_STATE_CLOSE_WAIT_1: + case C3CN_STATE_CLOSE_WAIT_2: + c3cn_closed(c3cn); + break; + case C3CN_STATE_ABORTING: + break; + default: + cxgb3i_log_error("%s: close_rpl, TID %u in bad state %u\n", + c3cn->cdev->name, c3cn->tid, c3cn->state); + } + +out: + kfree_skb(skb); } -/* - * Handler RX_ISCSI_HDR CPL messages. - */ -static int do_iscsi_hdr(struct t3cdev *t3dev, struct sk_buff *skb, void *ctx) +static int do_close_con_rpl(struct t3cdev *cdev, struct sk_buff *skb, + void *ctx) { struct s3_conn *c3cn = ctx; - process_cpl_msg(process_rx_iscsi_hdr, c3cn, skb); + + c3cn_conn_debug("rcv, c3cn 0x%p, s %u, f 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + + process_cpl_msg_ref(process_close_con_rpl, c3cn, skb); return 0; } /* - * Handler for TX_DATA_ACK CPL messages. + * Process ABORT_REQ_RSS CPL message: -> host + * Process abort requests. If we are waiting for an ABORT_RPL we ignore this + * request except that we need to reply to it. */ -static int do_wr_ack(struct t3cdev *cdev, struct sk_buff *skb, void *ctx) -{ - struct s3_conn *c3cn = ctx; - process_cpl_msg(wr_ack, c3cn, skb); - return 0; +static int abort_status_to_errno(struct s3_conn *c3cn, int abort_reason, + int *need_rst) +{ + switch (abort_reason) { + case CPL_ERR_BAD_SYN: /* fall through */ + case CPL_ERR_CONN_RESET: + return c3cn->state > C3CN_STATE_ESTABLISHED ? + EPIPE : ECONNRESET; + case CPL_ERR_XMIT_TIMEDOUT: + case CPL_ERR_PERSIST_TIMEDOUT: + case CPL_ERR_FINWAIT2_TIMEDOUT: + case CPL_ERR_KEEPALIVE_TIMEDOUT: + return ETIMEDOUT; + default: + return EIO; + } } -/* - * Handler for PEER_CLOSE CPL messages. - */ -static int do_peer_close(struct t3cdev *cdev, struct sk_buff *skb, void *ctx) +static void process_abort_req(struct s3_conn *c3cn, struct sk_buff *skb) { - struct s3_conn *c3cn = ctx; + int rst_status = CPL_ABORT_NO_RST; + const struct cpl_abort_req_rss *req = cplhdr(skb); - c3cn_conn_debug("rcv, c3cn 0x%p, 0x%x, 0x%lx.\n", + c3cn_conn_debug("c3cn 0x%p, state %u, flag 0x%lx.\n", c3cn, c3cn->state, c3cn->flags); - process_cpl_msg_ref(do_peer_fin, c3cn, skb); - return 0; + + if (!c3cn_flag(c3cn, C3CN_ABORT_REQ_RCVD)) { + c3cn_set_flag(c3cn, C3CN_ABORT_REQ_RCVD); + c3cn_set_state(c3cn, C3CN_STATE_ABORTING); + __kfree_skb(skb); + return; + } + + c3cn_clear_flag(c3cn, C3CN_ABORT_REQ_RCVD); + send_abort_rpl(c3cn, rst_status); + + if (!c3cn_flag(c3cn, C3CN_ABORT_RPL_PENDING)) { + c3cn->err = + abort_status_to_errno(c3cn, req->status, &rst_status); + c3cn_closed(c3cn); + } } -/* - * Handle an ABORT_REQ_RSS CPL message. - */ static int do_abort_req(struct t3cdev *cdev, struct sk_buff *skb, void *ctx) { const struct cpl_abort_req_rss *req = cplhdr(skb); struct s3_conn *c3cn = ctx; - c3cn_conn_debug("rcv, c3cn 0x%p, 0x%x, 0x%lx.\n", + c3cn_conn_debug("rcv, c3cn 0x%p, s 0x%x, f 0x%lx.\n", c3cn, c3cn->state, c3cn->flags); - if (is_neg_adv_abort(req->status)) { + if (req->status == CPL_ERR_RTX_NEG_ADVICE || + req->status == CPL_ERR_PERSIST_NEG_ADVICE) { __kfree_skb(skb); return 0; } @@ -1352,14 +999,38 @@ static int do_abort_req(struct t3cdev *cdev, struct sk_buff *skb, void *ctx) } /* - * Handle an ABORT_RPL_RSS CPL message. + * Process ABORT_RPL_RSS CPL message: -> host + * Process abort replies. We only process these messages if we anticipate + * them as the coordination between SW and HW in this area is somewhat lacking + * and sometimes we get ABORT_RPLs after we are done with the connection that + * originated the ABORT_REQ. */ +static void process_abort_rpl(struct s3_conn *c3cn, struct sk_buff *skb) +{ + c3cn_conn_debug("c3cn 0x%p, state %u, flag 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + + if (c3cn_flag(c3cn, C3CN_ABORT_RPL_PENDING)) { + if (!c3cn_flag(c3cn, C3CN_ABORT_RPL_RCVD)) + c3cn_set_flag(c3cn, C3CN_ABORT_RPL_RCVD); + else { + c3cn_clear_flag(c3cn, C3CN_ABORT_RPL_RCVD); + c3cn_clear_flag(c3cn, C3CN_ABORT_RPL_PENDING); + if (c3cn_flag(c3cn, C3CN_ABORT_REQ_RCVD)) + cxgb3i_log_error("%s tid %u, ABORT_RPL_RSS\n", + c3cn->cdev->name, c3cn->tid); + c3cn_closed(c3cn); + } + } + __kfree_skb(skb); +} + static int do_abort_rpl(struct t3cdev *cdev, struct sk_buff *skb, void *ctx) { struct cpl_abort_rpl_rss *rpl = cplhdr(skb); struct s3_conn *c3cn = ctx; - c3cn_conn_debug("rcv, status 0x%x, c3cn 0x%p, 0x%x, 0x%lx.\n", + c3cn_conn_debug("rcv, status 0x%x, c3cn 0x%p, s %u, 0x%lx.\n", rpl->status, c3cn, c3cn ? c3cn->state : 0, c3cn ? c3cn->flags : 0UL); @@ -1377,7 +1048,7 @@ static int do_abort_rpl(struct t3cdev *cdev, struct sk_buff *skb, void *ctx) * abort races with ABORT_REQ_RSS, the latter frees the connection * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED, * but FW turns the ABORT_REQ into a regular one and so we get - * ABORT_RPL_RSS with status 0 and no connection. Only on T3A. + * ABORT_RPL_RSS with status 0 and no connection. */ if (!c3cn) goto discard; @@ -1391,144 +1062,11 @@ discard: } /* - * Handler for CLOSE_CON_RPL CPL messages. + * Process RX_ISCSI_HDR CPL message: -> host + * Handle received PDUs, the payload could be DDP'ed. If not, the payload + * follow after the bhs. */ -static int do_close_con_rpl(struct t3cdev *cdev, struct sk_buff *skb, - void *ctx) -{ - struct s3_conn *c3cn = ctx; - - c3cn_conn_debug("rcv, c3cn 0x%p, 0x%x, 0x%lx.\n", - c3cn, c3cn->state, c3cn->flags); - - process_cpl_msg_ref(process_close_con_rpl, c3cn, skb); - return 0; -} - -/* - * Definitions and declarations for CPL message processing. - * ======================================================== - */ - -static void make_established(struct s3_conn *, u32, unsigned int); -static void act_open_retry_timer(unsigned long); -static void mk_act_open_req(struct s3_conn *, struct sk_buff *, - unsigned int, const struct l2t_entry *); -static int act_open_rpl_status_to_errno(int); -static void handle_excess_rx(struct s3_conn *, struct sk_buff *); -static int abort_status_to_errno(struct s3_conn *, int, int *); -static void send_abort_rpl(struct sk_buff *, struct t3cdev *, int); -static struct sk_buff *get_cpl_reply_skb(struct sk_buff *, size_t, gfp_t); - -/* - * Dequeue and return the first unacknowledged's WR on a connections's pending - * list. - */ -static inline struct sk_buff *dequeue_wr(struct s3_conn *c3cn) -{ - struct sk_buff *skb = c3cn->wr_pending_head; - - if (likely(skb)) { - /* Don't bother clearing the tail */ - c3cn->wr_pending_head = (struct sk_buff *)skb->sp; - skb->sp = NULL; - } - return skb; -} - -/* - * Return the first pending WR without removing it from the list. - */ -static inline struct sk_buff *peek_wr(const struct s3_conn *c3cn) -{ - return c3cn->wr_pending_head; -} - -static inline void free_wr_skb(struct sk_buff *skb) -{ - kfree_skb(skb); -} - -static void purge_wr_queue(struct s3_conn *c3cn) -{ - struct sk_buff *skb; - while ((skb = dequeue_wr(c3cn)) != NULL) - free_wr_skb(skb); -} - -static inline void set_abort_rpl_wr(struct sk_buff *skb, unsigned int tid, - int cmd) -{ - struct cpl_abort_rpl *rpl = cplhdr(skb); - - rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); - rpl->wr.wr_lo = htonl(V_WR_TID(tid)); - OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); - rpl->cmd = cmd; -} - -/* - * CPL message processing ... - * ========================== - */ - -/* - * Updates connection state from an active establish CPL message. Runs with - * the connection lock held. - */ -static void c3cn_act_establish(struct s3_conn *c3cn, - struct sk_buff *skb) -{ - struct cpl_act_establish *req = cplhdr(skb); - u32 rcv_isn = ntohl(req->rcv_isn); /* real RCV_ISN + 1 */ - - c3cn_conn_debug("c3cn 0x%p, state 0x%x, flag 0x%lx.\n", - c3cn, c3cn->state, c3cn->flags); - - if (unlikely(c3cn->state != C3CN_STATE_SYN_SENT)) - printk(KERN_ERR "TID %u expected SYN_SENT, found %d\n", - c3cn->tid, c3cn->state); - - c3cn->copied_seq = c3cn->rcv_wup = c3cn->rcv_nxt = rcv_isn; - make_established(c3cn, ntohl(req->snd_isn), ntohs(req->tcp_opt)); - - if (unlikely(c3cn_flag(c3cn, C3CN_CLOSE_NEEDED))) { - /* upper layer has requested closing */ - abort_conn(c3cn, skb); - return; - } - - __kfree_skb(skb); - if (s3_push_frames(c3cn, 1)) - cxgb3i_conn_tx_open(c3cn); -} - -/* - * Handle active open failures. - */ -static void active_open_failed(struct s3_conn *c3cn, - struct sk_buff *skb) -{ - struct cpl_act_open_rpl *rpl = cplhdr(skb); - - c3cn_conn_debug("c3cn 0x%p, state 0x%x, flag 0x%lx.\n", - c3cn, c3cn->state, c3cn->flags); - - if (rpl->status == CPL_ERR_CONN_EXIST && - c3cn->retry_timer.function != act_open_retry_timer) { - c3cn->retry_timer.function = act_open_retry_timer; - c3cn_reset_timer(c3cn, &c3cn->retry_timer, - jiffies + HZ / 2); - } else - fail_act_open(c3cn, act_open_rpl_status_to_errno(rpl->status)); - __kfree_skb(skb); -} - -/* - * Process received pdu for a connection. - */ -static void process_rx_iscsi_hdr(struct s3_conn *c3cn, - struct sk_buff *skb) +static void process_rx_iscsi_hdr(struct s3_conn *c3cn, struct sk_buff *skb) { struct cpl_iscsi_hdr *hdr_cpl = cplhdr(skb); struct cpl_iscsi_hdr_norss data_cpl; @@ -1537,8 +1075,10 @@ static void process_rx_iscsi_hdr(struct s3_conn *c3cn, unsigned int len; int err; - if (unlikely(c3cn_no_receive(c3cn))) { - handle_excess_rx(c3cn, skb); + if (unlikely(c3cn->state >= C3CN_STATE_PASSIVE_CLOSE)) { + if (c3cn->state != C3CN_STATE_ABORTING) + send_abort_req(c3cn); + __kfree_skb(skb); return; } @@ -1551,8 +1091,9 @@ static void process_rx_iscsi_hdr(struct s3_conn *c3cn, len = hdr_len = ntohs(hdr_cpl->len); /* msg coalesce is off or not enough data received */ if (skb->len <= hdr_len) { - printk(KERN_ERR "%s: TID %u, ISCSI_HDR, skb len %u < %u.\n", - c3cn->cdev->name, c3cn->tid, skb->len, hdr_len); + cxgb3i_log_error("%s: TID %u, ISCSI_HDR, skb len %u < %u.\n", + c3cn->cdev->name, c3cn->tid, + skb->len, hdr_len); goto abort_conn; } @@ -1566,6 +1107,9 @@ static void process_rx_iscsi_hdr(struct s3_conn *c3cn, skb_ulp_ddigest(skb) = ntohl(ddp_cpl.ulp_crc); status = ntohl(ddp_cpl.ddp_status); + c3cn_rx_debug("rx skb 0x%p, len %u, pdulen %u, ddp status 0x%x.\n", + skb, skb->len, skb_ulp_pdulen(skb), status); + if (status & (1 << RX_DDP_STATUS_HCRC_SHIFT)) skb_ulp_mode(skb) |= ULP2_FLAG_HCRC_ERROR; if (status & (1 << RX_DDP_STATUS_DCRC_SHIFT)) @@ -1590,15 +1134,24 @@ static void process_rx_iscsi_hdr(struct s3_conn *c3cn, return; abort_conn: - s3_send_reset(c3cn, CPL_ABORT_SEND_RST, NULL); + send_abort_req(c3cn); __kfree_skb(skb); } +static int do_iscsi_hdr(struct t3cdev *t3dev, struct sk_buff *skb, void *ctx) +{ + struct s3_conn *c3cn = ctx; + + process_cpl_msg(process_rx_iscsi_hdr, c3cn, skb); + return 0; +} + /* + * Process TX_DATA_ACK CPL messages: -> host * Process an acknowledgment of WR completion. Advance snd_una and send the * next batch of work requests from the write queue. */ -static void wr_ack(struct s3_conn *c3cn, struct sk_buff *skb) +static void process_wr_ack(struct s3_conn *c3cn, struct sk_buff *skb) { struct cpl_wr_ack *hdr = cplhdr(skb); unsigned int credits = ntohs(hdr->credits); @@ -1612,9 +1165,9 @@ static void wr_ack(struct s3_conn *c3cn, struct sk_buff *skb) struct sk_buff *p = peek_wr(c3cn); if (unlikely(!p)) { - printk(KERN_ERR "%u WR_ACK credits for TID %u with " - "nothing pending, state %u\n", - credits, c3cn->tid, c3cn->state); + cxgb3i_log_error("%u WR_ACK credits for TID %u with " + "nothing pending, state %u\n", + credits, c3cn->tid, c3cn->state); break; } if (unlikely(credits < p->csum)) { @@ -1633,186 +1186,262 @@ static void wr_ack(struct s3_conn *c3cn, struct sk_buff *skb) if (c3cn->snd_una != snd_una) { c3cn->snd_una = snd_una; dst_confirm(c3cn->dst_cache); - if (c3cn->snd_una == c3cn->snd_nxt) - c3cn_reset_flag(c3cn, C3CN_TX_WAIT_IDLE); } - if (skb_queue_len(&c3cn->write_queue) && s3_push_frames(c3cn, 0)) + if (skb_queue_len(&c3cn->write_queue) && c3cn_push_tx_frames(c3cn, 0)) cxgb3i_conn_tx_open(c3cn); out_free: __kfree_skb(skb); } +static int do_wr_ack(struct t3cdev *cdev, struct sk_buff *skb, void *ctx) +{ + struct s3_conn *c3cn = ctx; + + process_cpl_msg(process_wr_ack, c3cn, skb); + return 0; +} + /* - * Handle a peer FIN. + * for each connection, pre-allocate skbs needed for close/abort requests. So + * that we can service the request right away. */ -static void do_peer_fin(struct s3_conn *c3cn, struct sk_buff *skb) +static void c3cn_free_cpl_skbs(struct s3_conn *c3cn) { - int keep = 0; + if (c3cn->cpl_close) + kfree_skb(c3cn->cpl_close); + if (c3cn->cpl_abort_req) + kfree_skb(c3cn->cpl_abort_req); + if (c3cn->cpl_abort_rpl) + kfree_skb(c3cn->cpl_abort_rpl); +} - c3cn_conn_debug("c3cn 0x%p, state 0x%x, flag 0x%lx.\n", - c3cn, c3cn->state, c3cn->flags); +static int c3cn_alloc_cpl_skbs(struct s3_conn *c3cn) +{ + c3cn->cpl_close = alloc_skb(sizeof(struct cpl_close_con_req), + GFP_KERNEL); + if (!c3cn->cpl_close) + return -ENOMEM; + skb_put(c3cn->cpl_close, sizeof(struct cpl_close_con_req)); - if (c3cn_flag(c3cn, C3CN_ABORT_RPL_PENDING)) - goto out; + c3cn->cpl_abort_req = alloc_skb(sizeof(struct cpl_abort_req), + GFP_KERNEL); + if (!c3cn->cpl_abort_req) + goto free_cpl_skbs; + skb_put(c3cn->cpl_abort_req, sizeof(struct cpl_abort_req)); - c3cn->shutdown |= C3CN_RCV_SHUTDOWN; - c3cn_set_flag(c3cn, C3CN_DONE); + c3cn->cpl_abort_rpl = alloc_skb(sizeof(struct cpl_abort_rpl), + GFP_KERNEL); + if (!c3cn->cpl_abort_rpl) + goto free_cpl_skbs; + skb_put(c3cn->cpl_abort_rpl, sizeof(struct cpl_abort_rpl)); - switch (c3cn->state) { - case C3CN_STATE_ESTABLISHED: - break; - case C3CN_STATE_CLOSING: - c3cn_done(c3cn); - break; - default: - printk(KERN_ERR - "%s: TID %u received PEER_CLOSE in bad state %d\n", - c3cn->cdev->name, c3cn->tid, c3cn->state); - } + return 0; - cxgb3i_conn_closing(c3cn); -out: - if (!keep) - __kfree_skb(skb); +free_cpl_skbs: + c3cn_free_cpl_skbs(c3cn); + return -ENOMEM; } -/* - * Process abort requests. If we are waiting for an ABORT_RPL we ignore this - * request except that we need to reply to it. +/** + * c3cn_release_offload_resources - + * Release resources held by an offload connection (TID, L2T entry, etc.) + * @c3cn: the offloaded iscsi tcp connection. */ -static void process_abort_req(struct s3_conn *c3cn, - struct sk_buff *skb) +static void c3cn_release_offload_resources(struct s3_conn *c3cn) { - int rst_status = CPL_ABORT_NO_RST; - const struct cpl_abort_req_rss *req = cplhdr(skb); - - c3cn_conn_debug("c3cn 0x%p, state 0x%x, flag 0x%lx.\n", - c3cn, c3cn->state, c3cn->flags); + struct t3cdev *cdev = c3cn->cdev; + unsigned int tid = c3cn->tid; - if (!c3cn_flag(c3cn, C3CN_ABORT_REQ_RCVD)) { - c3cn_set_flag(c3cn, C3CN_ABORT_REQ_RCVD); - c3cn_set_flag(c3cn, C3CN_ABORT_SHUTDOWN); - __kfree_skb(skb); + if (!cdev) return; - } - c3cn_reset_flag(c3cn, C3CN_ABORT_REQ_RCVD); - /* - * Three cases to consider: - * a) We haven't sent an abort_req; close the connection. - * b) We have sent a post-close abort_req that will get to TP too late - * and will generate a CPL_ERR_ABORT_FAILED reply. The reply will - * be ignored and the connection should be closed now. - * c) We have sent a regular abort_req that will get to TP too late. - * That will generate an abort_rpl with status 0, wait for it. - */ - send_abort_rpl(skb, c3cn->cdev, rst_status); + c3cn->qset = 0; - if (!c3cn_flag(c3cn, C3CN_ABORT_RPL_PENDING)) { - c3cn->err = - abort_status_to_errno(c3cn, req->status, &rst_status); + c3cn_free_cpl_skbs(c3cn); - c3cn_done(c3cn); + if (c3cn->wr_avail != c3cn->wr_max) { + purge_wr_queue(c3cn); + reset_wr_list(c3cn); } -} -/* - * Process abort replies. We only process these messages if we anticipate - * them as the coordination between SW and HW in this area is somewhat lacking - * and sometimes we get ABORT_RPLs after we are done with the connection that - * originated the ABORT_REQ. - */ -static void process_abort_rpl(struct s3_conn *c3cn, - struct sk_buff *skb) -{ - c3cn_conn_debug("c3cn 0x%p, state 0x%x, flag 0x%lx.\n", - c3cn, c3cn->state, c3cn->flags); + if (c3cn->l2t) { + l2t_release(L2DATA(cdev), c3cn->l2t); + c3cn->l2t = NULL; + } - if (c3cn_flag(c3cn, C3CN_ABORT_RPL_PENDING)) { - if (!c3cn_flag(c3cn, C3CN_ABORT_RPL_RCVD)) - c3cn_set_flag(c3cn, C3CN_ABORT_RPL_RCVD); - else { - c3cn_reset_flag(c3cn, C3CN_ABORT_RPL_RCVD); - c3cn_reset_flag(c3cn, C3CN_ABORT_RPL_PENDING); - BUG_ON(c3cn_flag(c3cn, C3CN_ABORT_REQ_RCVD)); - c3cn_done(c3cn); - } + if (c3cn->state == C3CN_STATE_CONNECTING) /* we have ATID */ + s3_free_atid(cdev, tid); + else { /* we have TID */ + cxgb3_remove_tid(cdev, (void *)c3cn, tid); + c3cn_put(c3cn); } - __kfree_skb(skb); + + c3cn->cdev = NULL; } -/* - * Process a peer ACK to our FIN. +/** + * cxgb3i_c3cn_create - allocate and initialize an s3_conn structure + * returns the s3_conn structure allocated. */ -static void process_close_con_rpl(struct s3_conn *c3cn, - struct sk_buff *skb) +struct s3_conn *cxgb3i_c3cn_create(void) { - struct cpl_close_con_rpl *rpl = cplhdr(skb); + struct s3_conn *c3cn; - c3cn_conn_debug("c3cn 0x%p, state 0x%x, flag 0x%lx.\n", - c3cn, c3cn->state, c3cn->flags); + c3cn = kzalloc(sizeof(*c3cn), GFP_KERNEL); + if (!c3cn) + return NULL; - c3cn->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */ + /* pre-allocate close/abort cpl, so we don't need to wait for memory + when close/abort is requested. */ + if (c3cn_alloc_cpl_skbs(c3cn) < 0) + goto free_c3cn; - if (c3cn_flag(c3cn, C3CN_ABORT_RPL_PENDING)) - goto out; + c3cn_conn_debug("alloc c3cn 0x%p.\n", c3cn); - if (c3cn->state == C3CN_STATE_CLOSING) { - c3cn_done(c3cn); - } else - printk(KERN_ERR - "%s: TID %u received CLOSE_CON_RPL in bad state %d\n", - c3cn->cdev->name, c3cn->tid, c3cn->state); -out: - kfree_skb(skb); + c3cn->flags = 0; + spin_lock_init(&c3cn->lock); + atomic_set(&c3cn->refcnt, 1); + skb_queue_head_init(&c3cn->receive_queue); + skb_queue_head_init(&c3cn->write_queue); + setup_timer(&c3cn->retry_timer, NULL, (unsigned long)c3cn); + rwlock_init(&c3cn->callback_lock); + + return c3cn; + +free_c3cn: + kfree(c3cn); + return NULL; } -/* - * Random utility functions for CPL message processing ... - * ======================================================= - */ +static void c3cn_active_close(struct s3_conn *c3cn) +{ + int data_lost; + int close_req = 0; + + c3cn_conn_debug("c3cn 0x%p, state %u, flag 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + + dst_confirm(c3cn->dst_cache); + + c3cn_hold(c3cn); + spin_lock_bh(&c3cn->lock); + + data_lost = skb_queue_len(&c3cn->receive_queue); + __skb_queue_purge(&c3cn->receive_queue); + + switch (c3cn->state) { + case C3CN_STATE_CLOSED: + case C3CN_STATE_ACTIVE_CLOSE: + case C3CN_STATE_CLOSE_WAIT_1: + case C3CN_STATE_CLOSE_WAIT_2: + case C3CN_STATE_ABORTING: + /* nothing need to be done */ + break; + case C3CN_STATE_CONNECTING: + /* defer until cpl_act_open_rpl or cpl_act_establish */ + c3cn_set_flag(c3cn, C3CN_ACTIVE_CLOSE_NEEDED); + break; + case C3CN_STATE_ESTABLISHED: + close_req = 1; + c3cn_set_state(c3cn, C3CN_STATE_ACTIVE_CLOSE); + break; + case C3CN_STATE_PASSIVE_CLOSE: + close_req = 1; + c3cn_set_state(c3cn, C3CN_STATE_CLOSE_WAIT_2); + break; + } + + if (close_req) { + if (data_lost) + /* Unread data was tossed, zap the connection. */ + send_abort_req(c3cn); + else + send_close_req(c3cn); + } + + spin_unlock_bh(&c3cn->lock); + c3cn_put(c3cn); +} /** - * find_best_mtu - find the entry in the MTU table closest to an MTU - * @d: TOM state - * @mtu: the target MTU - * - * Returns the index of the value in the MTU table that is closest to but - * does not exceed the target MTU. + * cxgb3i_c3cn_release - close and release an iscsi tcp connection and any + * resource held + * @c3cn: the iscsi tcp connection */ -static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu) +void cxgb3i_c3cn_release(struct s3_conn *c3cn) { - int i = 0; - - while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu) - ++i; - return i; + c3cn_conn_debug("c3cn 0x%p, s %u, f 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + if (likely(c3cn->state != C3CN_STATE_CONNECTING)) + c3cn_active_close(c3cn); + else + c3cn_set_flag(c3cn, C3CN_ACTIVE_CLOSE_NEEDED); + c3cn_put(c3cn); } -static unsigned int select_mss(struct s3_conn *c3cn, unsigned int pmtu) +static int is_cxgb3_dev(struct net_device *dev) { - unsigned int idx; - struct dst_entry *dst = c3cn->dst_cache; - struct t3cdev *cdev = c3cn->cdev; - const struct t3c_data *td = T3C_DATA(cdev); - u16 advmss = dst_metric(dst, RTAX_ADVMSS); + struct cxgb3i_sdev_data *cdata; - if (advmss > pmtu - 40) - advmss = pmtu - 40; - if (advmss < td->mtus[0] - 40) - advmss = td->mtus[0] - 40; - idx = find_best_mtu(td, advmss + 40); - return idx; + write_lock(&cdata_rwlock); + list_for_each_entry(cdata, &cdata_list, list) { + struct adap_ports *ports = &cdata->ports; + int i; + + for (i = 0; i < ports->nports; i++) + if (dev == ports->lldevs[i]) { + write_unlock(&cdata_rwlock); + return 1; + } + } + write_unlock(&cdata_rwlock); + return 0; } -static void fail_act_open(struct s3_conn *c3cn, int errno) +/** + * cxgb3_egress_dev - return the cxgb3 egress device + * @root_dev: the root device anchoring the search + * @c3cn: the connection used to determine egress port in bonding mode + * @context: in bonding mode, indicates a connection set up or failover + * + * Return egress device or NULL if the egress device isn't one of our ports. + */ +static struct net_device *cxgb3_egress_dev(struct net_device *root_dev, + struct s3_conn *c3cn, + int context) { - c3cn_conn_debug("c3cn 0x%p, state 0x%x, flag 0x%lx.\n", - c3cn, c3cn->state, c3cn->flags); + while (root_dev) { + if (root_dev->priv_flags & IFF_802_1Q_VLAN) + root_dev = vlan_dev_real_dev(root_dev); + else if (is_cxgb3_dev(root_dev)) + return root_dev; + else + return NULL; + } + return NULL; +} - c3cn->err = errno; - c3cn_done(c3cn); +static struct rtable *find_route(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) +{ + struct rtable *rt; + struct flowi fl = { + .oif = 0, + .nl_u = { + .ip4_u = { + .daddr = daddr, + .saddr = saddr, + .tos = 0 } }, + .proto = IPPROTO_TCP, + .uli_u = { + .ports = { + .sport = sport, + .dport = dport } } }; + + if (ip_route_output_flow(&init_net, &rt, &fl, NULL, 0)) + return NULL; + return rt; } /* @@ -1827,195 +1456,355 @@ static void init_offload_conn(struct s3_conn *c3cn, c3cn->wr_unacked = 0; c3cn->mss_idx = select_mss(c3cn, dst_mtu(dst)); - c3cn->ctrl_skb_cache = alloc_skb(CTRL_SKB_LEN, gfp_any()); reset_wr_list(c3cn); } -static void act_open_retry_timer(unsigned long data) +static int initiate_act_open(struct s3_conn *c3cn, struct net_device *dev) { + struct cxgb3i_sdev_data *cdata = NDEV2CDATA(dev); + struct t3cdev *cdev = cdata->cdev; + struct dst_entry *dst = c3cn->dst_cache; struct sk_buff *skb; - struct s3_conn *c3cn = (struct s3_conn *)data; - c3cn_conn_debug("c3cn 0x%p, state 0x%x.\n", c3cn, c3cn->state); + c3cn_conn_debug("c3cn 0x%p, state %u, flag 0x%lx.\n", + c3cn, c3cn->state, c3cn->flags); + /* + * Initialize connection data. Note that the flags and ULP mode are + * initialized higher up ... + */ + c3cn->dev = dev; + c3cn->cdev = cdev; + c3cn->tid = cxgb3_alloc_atid(cdev, cdata->client, c3cn); + if (c3cn->tid < 0) + goto out_err; - spin_lock(&c3cn->lock); - skb = alloc_skb(sizeof(struct cpl_act_open_req), GFP_ATOMIC); + c3cn->qset = 0; + c3cn->l2t = t3_l2t_get(cdev, dst->neighbour, dev); + if (!c3cn->l2t) + goto free_tid; + + skb = alloc_skb(sizeof(struct cpl_act_open_req), GFP_KERNEL); if (!skb) - fail_act_open(c3cn, ENOMEM); - else { - skb->sk = (struct sock *)c3cn; - set_arp_failure_handler(skb, act_open_req_arp_failure); - mk_act_open_req(c3cn, skb, c3cn->tid, c3cn->l2t); - l2t_send(c3cn->cdev, skb, c3cn->l2t); - } - spin_unlock(&c3cn->lock); - c3cn_put(c3cn); -} + goto free_l2t; -/* - * Convert an ACT_OPEN_RPL status to a Linux errno. - */ -static int act_open_rpl_status_to_errno(int status) -{ - switch (status) { - case CPL_ERR_CONN_RESET: - return ECONNREFUSED; - case CPL_ERR_ARP_MISS: - return EHOSTUNREACH; - case CPL_ERR_CONN_TIMEDOUT: - return ETIMEDOUT; - case CPL_ERR_TCAM_FULL: - return ENOMEM; - case CPL_ERR_CONN_EXIST: - printk(KERN_ERR "ACTIVE_OPEN_RPL: 4-tuple in use\n"); - return EADDRINUSE; - default: - return EIO; - } + skb->sk = (struct sock *)c3cn; + set_arp_failure_handler(skb, act_open_req_arp_failure); + + c3cn_hold(c3cn); + + init_offload_conn(c3cn, cdev, dst); + c3cn->err = 0; + + make_act_open_req(c3cn, skb, c3cn->tid, c3cn->l2t); + l2t_send(cdev, skb, c3cn->l2t); + return 0; + +free_l2t: + l2t_release(L2DATA(cdev), c3cn->l2t); +free_tid: + s3_free_atid(cdev, c3cn->tid); + c3cn->tid = 0; +out_err: + return -1; } -/* - * Convert the status code of an ABORT_REQ into a Linux error code. Also - * indicate whether RST should be sent in response. + +/** + * cxgb3i_c3cn_connect - initiates an iscsi tcp connection to a given address + * @c3cn: the iscsi tcp connection + * @usin: destination address + * + * return 0 if active open request is sent, < 0 otherwise. */ -static int abort_status_to_errno(struct s3_conn *c3cn, - int abort_reason, int *need_rst) +int cxgb3i_c3cn_connect(struct s3_conn *c3cn, struct sockaddr_in *usin) { - switch (abort_reason) { - case CPL_ERR_BAD_SYN: /* fall through */ - case CPL_ERR_CONN_RESET: - return c3cn->state == C3CN_STATE_CLOSING ? EPIPE : ECONNRESET; - case CPL_ERR_XMIT_TIMEDOUT: - case CPL_ERR_PERSIST_TIMEDOUT: - case CPL_ERR_FINWAIT2_TIMEDOUT: - case CPL_ERR_KEEPALIVE_TIMEDOUT: - return ETIMEDOUT; - default: - return EIO; + struct rtable *rt; + struct net_device *dev; + struct cxgb3i_sdev_data *cdata; + struct t3cdev *cdev; + __be32 sipv4; + int err; + + if (usin->sin_family != AF_INET) + return -EAFNOSUPPORT; + + c3cn->daddr.sin_port = usin->sin_port; + c3cn->daddr.sin_addr.s_addr = usin->sin_addr.s_addr; + + rt = find_route(c3cn->saddr.sin_addr.s_addr, + c3cn->daddr.sin_addr.s_addr, + c3cn->saddr.sin_port, + c3cn->daddr.sin_port); + if (rt == NULL) { + c3cn_conn_debug("NO route to 0x%x, port %u.\n", + c3cn->daddr.sin_addr.s_addr, + ntohs(c3cn->daddr.sin_port)); + return -ENETUNREACH; } -} -static void send_abort_rpl(struct sk_buff *skb, struct t3cdev *cdev, - int rst_status) -{ - struct sk_buff *reply_skb; - struct cpl_abort_req_rss *req = cplhdr(skb); + if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { + c3cn_conn_debug("multi-cast route to 0x%x, port %u.\n", + c3cn->daddr.sin_addr.s_addr, + ntohs(c3cn->daddr.sin_port)); + ip_rt_put(rt); + return -ENETUNREACH; + } - reply_skb = get_cpl_reply_skb(skb, sizeof(struct cpl_abort_rpl), - gfp_any()); + if (!c3cn->saddr.sin_addr.s_addr) + c3cn->saddr.sin_addr.s_addr = rt->rt_src; - reply_skb->priority = CPL_PRIORITY_DATA; - set_abort_rpl_wr(reply_skb, GET_TID(req), rst_status); - kfree_skb(skb); - cxgb3_ofld_send(cdev, reply_skb); -} + /* now commit destination to connection */ + c3cn->dst_cache = &rt->u.dst; -/* - * Returns an sk_buff for a reply CPL message of size len. If the input - * sk_buff has no other users it is trimmed and reused, otherwise a new buffer - * is allocated. The input skb must be of size at least len. Note that this - * operation does not destroy the original skb data even if it decides to reuse - * the buffer. - */ -static struct sk_buff *get_cpl_reply_skb(struct sk_buff *skb, size_t len, - gfp_t gfp) -{ - if (likely(!skb_cloned(skb))) { - BUG_ON(skb->len < len); - __skb_trim(skb, len); - skb_get(skb); - } else { - skb = alloc_skb(len, gfp); - if (skb) - __skb_put(skb, len); + /* try to establish an offloaded connection */ + dev = cxgb3_egress_dev(c3cn->dst_cache->dev, c3cn, 0); + if (dev == NULL) { + c3cn_conn_debug("c3cn 0x%p, egress dev NULL.\n", c3cn); + return -ENETUNREACH; } - return skb; + cdata = NDEV2CDATA(dev); + cdev = cdata->cdev; + + /* get a source port if one hasn't been provided */ + err = c3cn_get_port(c3cn, cdata); + if (err) + return err; + + c3cn_conn_debug("c3cn 0x%p get port %u.\n", + c3cn, ntohs(c3cn->saddr.sin_port)); + + sipv4 = cxgb3i_get_private_ipv4addr(dev); + if (!sipv4) { + c3cn_conn_debug("c3cn 0x%p, iscsi ip not configured.\n", c3cn); + sipv4 = c3cn->saddr.sin_addr.s_addr; + cxgb3i_set_private_ipv4addr(dev, sipv4); + } else + c3cn->saddr.sin_addr.s_addr = sipv4; + + c3cn_conn_debug("c3cn 0x%p, %u.%u.%u.%u,%u-%u.%u.%u.%u,%u SYN_SENT.\n", + c3cn, NIPQUAD(c3cn->saddr.sin_addr.s_addr), + ntohs(c3cn->saddr.sin_port), + NIPQUAD(c3cn->daddr.sin_addr.s_addr), + ntohs(c3cn->daddr.sin_port)); + + c3cn_set_state(c3cn, C3CN_STATE_CONNECTING); + if (!initiate_act_open(c3cn, dev)) + return 0; + + /* + * If we get here, we don't have an offload connection so simply + * return a failure. + */ + err = -ENOTSUPP; + + /* + * This trashes the connection and releases the local port, + * if necessary. + */ + c3cn_conn_debug("c3cn 0x%p -> CLOSED.\n", c3cn); + c3cn_set_state(c3cn, C3CN_STATE_CLOSED); + ip_rt_put(rt); + c3cn_put_port(c3cn); + c3cn->daddr.sin_port = 0; + return err; } -/* - * Release resources held by an offload connection (TID, L2T entry, etc.) +/** + * cxgb3i_c3cn_rx_credits - ack received tcp data. + * @c3cn: iscsi tcp connection + * @copied: # of bytes processed + * + * Called after some received data has been read. It returns RX credits + * to the HW for the amount of data processed. */ -static void t3_release_offload_resources(struct s3_conn *c3cn) +void cxgb3i_c3cn_rx_credits(struct s3_conn *c3cn, int copied) { - struct t3cdev *cdev = c3cn->cdev; - unsigned int tid = c3cn->tid; + struct t3cdev *cdev; + int must_send; + u32 credits, dack = 0; - if (!cdev) + if (c3cn->state != C3CN_STATE_ESTABLISHED) return; - c3cn->qset = 0; + credits = c3cn->copied_seq - c3cn->rcv_wup; + if (unlikely(!credits)) + return; - kfree_skb(c3cn->ctrl_skb_cache); - c3cn->ctrl_skb_cache = NULL; + cdev = c3cn->cdev; - if (c3cn->wr_avail != c3cn->wr_max) { - purge_wr_queue(c3cn); - reset_wr_list(c3cn); + if (unlikely(cxgb3_rx_credit_thres == 0)) + return; + + dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); + + /* + * For coalescing to work effectively ensure the receive window has + * at least 16KB left. + */ + must_send = credits + 16384 >= cxgb3_rcv_win; + + if (must_send || credits >= cxgb3_rx_credit_thres) + c3cn->rcv_wup += send_rx_credits(c3cn, credits, dack); +} + +/** + * cxgb3i_c3cn_send_pdus - send the skbs containing iscsi pdus + * @c3cn: iscsi tcp connection + * @skb: skb contains the iscsi pdu + * + * Add a list of skbs to a connection send queue. The skbs must comply with + * the max size limit of the device and have a headroom of at least + * TX_HEADER_LEN bytes. + * Return # of bytes queued. + */ +int cxgb3i_c3cn_send_pdus(struct s3_conn *c3cn, struct sk_buff *skb) +{ + struct sk_buff *next; + int err, copied = 0; + + spin_lock_bh(&c3cn->lock); + + if (c3cn->state != C3CN_STATE_ESTABLISHED) { + c3cn_tx_debug("c3cn 0x%p, not in est. state %u.\n", + c3cn, c3cn->state); + err = -EAGAIN; + goto out_err; } - if (c3cn->l2t) { - l2t_release(L2DATA(cdev), c3cn->l2t); - c3cn->l2t = NULL; + err = -EPIPE; + if (c3cn->err) { + c3cn_tx_debug("c3cn 0x%p, err %d.\n", c3cn, c3cn->err); + goto out_err; } - if (c3cn->state == C3CN_STATE_SYN_SENT) /* we have ATID */ - free_atid(cdev, tid); - else { /* we have TID */ - cxgb3_remove_tid(cdev, (void *)c3cn, tid); - c3cn_put(c3cn); + while (skb) { + int frags = skb_shinfo(skb)->nr_frags + + (skb->len != skb->data_len); + + if (unlikely(skb_headroom(skb) < TX_HEADER_LEN)) { + c3cn_tx_debug("c3cn 0x%p, skb head.\n", c3cn); + err = -EINVAL; + goto out_err; + } + + if (frags >= SKB_WR_LIST_SIZE) { + cxgb3i_log_error("c3cn 0x%p, tx frags %d, len %u,%u.\n", + c3cn, skb_shinfo(skb)->nr_frags, + skb->len, skb->data_len); + err = -EINVAL; + goto out_err; + } + + next = skb->next; + skb->next = NULL; + skb_entail(c3cn, skb, C3CB_FLAG_NO_APPEND | C3CB_FLAG_NEED_HDR); + copied += skb->len; + c3cn->write_seq += skb->len + ulp_extra_len(skb); + skb = next; } +done: + if (likely(skb_queue_len(&c3cn->write_queue))) + c3cn_push_tx_frames(c3cn, 1); + spin_unlock_bh(&c3cn->lock); + return copied; - c3cn->cdev = NULL; +out_err: + if (copied == 0 && err == -EPIPE) + copied = c3cn->err ? c3cn->err : -EPIPE; + goto done; } -/* - * Handles Rx data that arrives in a state where the connection isn't - * accepting new data. - */ -static void handle_excess_rx(struct s3_conn *c3cn, struct sk_buff *skb) +static void sdev_data_cleanup(struct cxgb3i_sdev_data *cdata) { - if (!c3cn_flag(c3cn, C3CN_ABORT_SHUTDOWN)) - abort_conn(c3cn, skb); + struct adap_ports *ports = &cdata->ports; + int i; - kfree_skb(skb); + for (i = 0; i < ports->nports; i++) + NDEV2CDATA(ports->lldevs[i]) = NULL; + cxgb3i_free_big_mem(cdata); } -/* - * Like get_cpl_reply_skb() but the returned buffer starts out empty. - */ -static struct sk_buff *__get_cpl_reply_skb(struct sk_buff *skb, size_t len, - gfp_t gfp) +void cxgb3i_sdev_cleanup(void) { - if (likely(!skb_cloned(skb) && !skb->data_len)) { - __skb_trim(skb, 0); - skb_get(skb); - } else - skb = alloc_skb(len, gfp); - return skb; + struct cxgb3i_sdev_data *cdata; + + write_lock(&cdata_rwlock); + list_for_each_entry(cdata, &cdata_list, list) { + list_del(&cdata->list); + sdev_data_cleanup(cdata); + } + write_unlock(&cdata_rwlock); } -/* - * Completes some final bits of initialization for just established connections - * and changes their state to C3CN_STATE_ESTABLISHED. - * - * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1. +int cxgb3i_sdev_init(cxgb3_cpl_handler_func *cpl_handlers) +{ + cpl_handlers[CPL_ACT_ESTABLISH] = do_act_establish; + cpl_handlers[CPL_ACT_OPEN_RPL] = do_act_open_rpl; + cpl_handlers[CPL_PEER_CLOSE] = do_peer_close; + cpl_handlers[CPL_ABORT_REQ_RSS] = do_abort_req; + cpl_handlers[CPL_ABORT_RPL_RSS] = do_abort_rpl; + cpl_handlers[CPL_CLOSE_CON_RPL] = do_close_con_rpl; + cpl_handlers[CPL_TX_DMA_ACK] = do_wr_ack; + cpl_handlers[CPL_ISCSI_HDR] = do_iscsi_hdr; + + if (cxgb3_max_connect > CXGB3I_MAX_CONN) + cxgb3_max_connect = CXGB3I_MAX_CONN; + return 0; +} + +/** + * cxgb3i_sdev_add - allocate and initialize resources for each adapter found + * @cdev: t3cdev adapter + * @client: cxgb3 driver client */ -static void make_established(struct s3_conn *c3cn, u32 snd_isn, - unsigned int opt) +void cxgb3i_sdev_add(struct t3cdev *cdev, struct cxgb3_client *client) { - c3cn_conn_debug("c3cn 0x%p, state 0x%x.\n", c3cn, c3cn->state); + struct cxgb3i_sdev_data *cdata; + struct ofld_page_info rx_page_info; + unsigned int wr_len; + int mapsize = DIV_ROUND_UP(cxgb3_max_connect, + 8 * sizeof(unsigned long)); + int i; - c3cn->write_seq = c3cn->snd_nxt = c3cn->snd_una = snd_isn; + cdata = cxgb3i_alloc_big_mem(sizeof(*cdata) + mapsize, GFP_KERNEL); + if (!cdata) + return; - /* - * Causes the first RX_DATA_ACK to supply any Rx credits we couldn't - * pass through opt0. - */ - if (cxgb3_rcv_win > (M_RCV_BUFSIZ << 10)) - c3cn->rcv_wup -= cxgb3_rcv_win - (M_RCV_BUFSIZ << 10); + if (cdev->ctl(cdev, GET_WR_LEN, &wr_len) < 0 || + cdev->ctl(cdev, GET_PORTS, &cdata->ports) < 0 || + cdev->ctl(cdev, GET_RX_PAGE_INFO, &rx_page_info) < 0) + goto free_cdata; - dst_confirm(c3cn->dst_cache); + s3_init_wr_tab(wr_len); - smp_mb(); - c3cn_set_state(c3cn, C3CN_STATE_ESTABLISHED); + INIT_LIST_HEAD(&cdata->list); + cdata->cdev = cdev; + cdata->client = client; + + for (i = 0; i < cdata->ports.nports; i++) + NDEV2CDATA(cdata->ports.lldevs[i]) = cdata; + + write_lock(&cdata_rwlock); + list_add_tail(&cdata->list, &cdata_list); + write_unlock(&cdata_rwlock); + + return; + +free_cdata: + cxgb3i_free_big_mem(cdata); +} + +/** + * cxgb3i_sdev_remove - free the allocated resources for the adapter + * @cdev: t3cdev adapter + */ +void cxgb3i_sdev_remove(struct t3cdev *cdev) +{ + struct cxgb3i_sdev_data *cdata = CXGB3_SDEV_DATA(cdev); + + write_lock(&cdata_rwlock); + list_del(&cdata->list); + write_unlock(&cdata_rwlock); + + sdev_data_cleanup(cdata); } diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.h b/drivers/scsi/cxgb3i/cxgb3i_offload.h index 0151989..5b93d62 100644 --- a/drivers/scsi/cxgb3i/cxgb3i_offload.h +++ b/drivers/scsi/cxgb3i/cxgb3i_offload.h @@ -1,12 +1,15 @@ /* - * Copyright (C) 2003-2008 Chelsio Communications. All rights reserved. + * cxgb3i_offload.h: Chelsio S3xx iscsi offloaded tcp connection management * - * Written by Dimitris Michailidis (dm@xxxxxxxxxxx) + * Copyright (C) 2003-2008 Chelsio Communications. All rights reserved. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this * release for licensing terms and conditions. + * + * Written by: Dimitris Michailidis (dm@xxxxxxxxxxx) + * Karen Xie (kxie@xxxxxxxxxxx) */ #ifndef _CXGB3I_OFFLOAD_H @@ -23,83 +26,104 @@ #define cxgb3i_log_error(fmt...) printk(KERN_ERR "cxgb3i: ERR! " fmt) #define cxgb3i_log_warn(fmt...) printk(KERN_WARNING "cxgb3i: WARN! " fmt) #define cxgb3i_log_info(fmt...) printk(KERN_INFO "cxgb3i: " fmt) - -#ifdef __DEBUG_CXGB3I__ #define cxgb3i_log_debug(fmt, args...) \ printk(KERN_INFO "cxgb3i: %s - " fmt, __func__ , ## args) -#else -#define cxgb3i_log_debug(fmt...) -#endif -#ifdef __DEBUG_C3CN_CONN__ -#define c3cn_conn_debug cxgb3i_log_debug -#else -#define c3cn_conn_debug(fmt...) -#endif - -/* - * Data structure to keep track of cxgb3 connection. +/** + * struct s3_conn - an iscsi tcp connection structure + * + * @dev: net device of with connection + * @cdev: adapter t3cdev for net device + * @flags: see c3cn_flags below + * @tid: connection id assigned by the h/w + * @qset: queue set used by connection + * @mss_idx: Maximum Segment Size table index + * @l2t: ARP resolution entry for offload packets + * @wr_max: maximum in-flight writes + * @wr_avail: number of writes available + * @wr_unacked: writes since last request for completion notification + * @wr_pending_head: head of pending write queue + * @wr_pending_tail: tail of pending write queue + * @cpl_close: skb for cpl_close_req + * @cpl_abort_req: skb for cpl_abort_req + * @cpl_abort_rpl: skb for cpl_abort_rpl + * @lock: connection status lock + * @refcnt: reference count on connection + * @state: connection state + * @saddr: source ip/port address + * @daddr: destination ip/port address + * @dst_cache: reference to destination route + * @receive_queue: received PDUs + * @write_queue: un-pushed pending writes + * @retry_timer: retry timer for various operations + * @err: connection error status + * @callback_lock: lock for opaque user context + * @user_data: opaque user context + * @rcv_nxt: next receive seq. # + * @copied_seq: head of yet unread data + * @rcv_wup: rcv_nxt on last window update sent + * @snd_nxt: next sequence we send + * @snd_una: first byte we want an ack for + * @write_seq: tail+1 of data held in send buffer */ struct s3_conn { - struct net_device *dev; /* net device of with connection */ - struct t3cdev *cdev; /* adapter t3cdev for net device */ - unsigned long flags; /* see c3cn_flags below */ - int tid; /* ID of TCP Control Block */ - int qset; /* queue Set used by connection */ - int mss_idx; /* Maximum Segment Size table index */ - struct l2t_entry *l2t; /* ARP resolution for offload packets */ - int wr_max; /* maximum in-flight writes */ - int wr_avail; /* number of writes available */ - int wr_unacked; /* writes since last request for */ - /* completion notification */ - struct sk_buff *wr_pending_head;/* head of pending write queue */ - struct sk_buff *wr_pending_tail;/* tail of pending write queue */ - struct sk_buff *ctrl_skb_cache; /* single entry cached skb for */ - /* short-term control operations */ - spinlock_t lock; /* connection status lock */ - atomic_t refcnt; /* reference count on connection */ - volatile unsigned int state; /* connection state */ - struct sockaddr_in saddr; /* source IP/port address */ - struct sockaddr_in daddr; /* destination IP/port address */ - struct dst_entry *dst_cache; /* reference to destination route */ - unsigned char shutdown; /* shutdown status */ - struct sk_buff_head receive_queue;/* received PDUs */ - struct sk_buff_head write_queue;/* un-pushed pending writes */ - - struct timer_list retry_timer; /* retry timer for various operations */ - int err; /* connection error status */ - rwlock_t callback_lock; /* lock for opaque user context */ - void *user_data; /* opaque user context */ - - u32 rcv_nxt; /* what we want to receive next */ - u32 copied_seq; /* head of yet unread data */ - u32 rcv_wup; /* rcv_nxt on last window update sent */ - u32 snd_nxt; /* next sequence we send */ - u32 snd_una; /* first byte we want an ack for */ - - u32 write_seq; /* tail+1 of data held in send buffer */ + struct net_device *dev; + struct t3cdev *cdev; + unsigned long flags; + int tid; + int qset; + int mss_idx; + struct l2t_entry *l2t; + int wr_max; + int wr_avail; + int wr_unacked; + struct sk_buff *wr_pending_head; + struct sk_buff *wr_pending_tail; + struct sk_buff *cpl_close; + struct sk_buff *cpl_abort_req; + struct sk_buff *cpl_abort_rpl; + spinlock_t lock; + atomic_t refcnt; + volatile unsigned int state; + struct sockaddr_in saddr; + struct sockaddr_in daddr; + struct dst_entry *dst_cache; + struct sk_buff_head receive_queue; + struct sk_buff_head write_queue; + struct timer_list retry_timer; + int err; + rwlock_t callback_lock; + void *user_data; + + u32 rcv_nxt; + u32 copied_seq; + u32 rcv_wup; + u32 snd_nxt; + u32 snd_una; + u32 write_seq; }; -/* Flags in c3cn->shutdown */ -#define C3CN_RCV_SHUTDOWN 0x1 -#define C3CN_SEND_SHUTDOWN 0x2 -#define C3CN_SHUTDOWN_MASK (C3CN_RCV_SHUTDOWN | C3CN_SEND_SHUTDOWN) - /* - * connection state bitmap + * connection state */ -#define C3CN_STATE_CLOSE 0x1 -#define C3CN_STATE_SYN_SENT 0x2 -#define C3CN_STATE_ESTABLISHED 0x4 -#define C3CN_STATE_CLOSING 0x8 -#define C3CN_STATE_ABORING 0x10 - -#define C3CN_STATE_MASK 0xFF +enum conn_states { + C3CN_STATE_CONNECTING = 1, + C3CN_STATE_ESTABLISHED, + C3CN_STATE_ACTIVE_CLOSE, + C3CN_STATE_PASSIVE_CLOSE, + C3CN_STATE_CLOSE_WAIT_1, + C3CN_STATE_CLOSE_WAIT_2, + C3CN_STATE_ABORTING, + C3CN_STATE_CLOSED, +}; -static inline unsigned int c3cn_in_state(const struct s3_conn *c3cn, - unsigned int states) +static inline unsigned int c3cn_is_closing(const struct s3_conn *c3cn) { - return states & c3cn->state; + return c3cn->state >= C3CN_STATE_ACTIVE_CLOSE; +} +static inline unsigned int c3cn_is_established(const struct s3_conn *c3cn) +{ + return c3cn->state == C3CN_STATE_ESTABLISHED; } /* @@ -108,37 +132,35 @@ static inline unsigned int c3cn_in_state(const struct s3_conn *c3cn, enum c3cn_flags { C3CN_ABORT_RPL_RCVD, /* received one ABORT_RPL_RSS message */ C3CN_ABORT_REQ_RCVD, /* received one ABORT_REQ_RSS message */ - C3CN_TX_WAIT_IDLE, /* suspend Tx until in-flight data is ACKed */ - C3CN_ABORT_SHUTDOWN, /* shouldn't send more abort requests */ - C3CN_ABORT_RPL_PENDING, /* expecting an abort reply */ - C3CN_CLOSE_CON_REQUESTED, /* we've sent a close_conn_req */ C3CN_TX_DATA_SENT, /* already sent a TX_DATA WR */ - C3CN_CLOSE_NEEDED, /* need to be closed */ - C3CN_DONE, + C3CN_ACTIVE_CLOSE_NEEDED, /* need to be closed */ }; -/* - * Per adapter data. Linked off of each Ethernet device port on the adapter. +/** + * cxgb3i_sdev_data - Per adapter data. + * Linked off of each Ethernet device port on the adapter. * Also available via the t3cdev structure since we have pointers to our port * net_device's there ... + * + * @list: list head to link elements + * @cdev: t3cdev adapter + * @client: CPL client pointer + * @ports: array of adapter ports + * @sport_map_next: next index into the port map + * @sport_map: source port map */ struct cxgb3i_sdev_data { - struct list_head list; /* links for list of all adapters */ - struct t3cdev *cdev; /* adapter t3cdev */ - struct cxgb3_client *client; /* CPL client pointer */ - struct adap_ports *ports; /* array of adapter ports */ - unsigned int rx_page_size; /* RX page size */ - struct sk_buff_head deferq; /* queue for processing replies from */ - /* worker thread context */ - struct work_struct deferq_task; /* worker thread */ + struct list_head list; + struct t3cdev *cdev; + struct cxgb3_client *client; + struct adap_ports ports; + unsigned int sport_map_next; + unsigned long sport_map[0]; }; #define NDEV2CDATA(ndev) (*(struct cxgb3i_sdev_data **)&(ndev)->ec_ptr) #define CXGB3_SDEV_DATA(cdev) NDEV2CDATA((cdev)->lldev) -/* - * Primary API routines. - */ void cxgb3i_sdev_cleanup(void); int cxgb3i_sdev_init(cxgb3_cpl_handler_func *); void cxgb3i_sdev_add(struct t3cdev *, struct cxgb3_client *); @@ -147,20 +169,26 @@ void cxgb3i_sdev_remove(struct t3cdev *); struct s3_conn *cxgb3i_c3cn_create(void); int cxgb3i_c3cn_connect(struct s3_conn *, struct sockaddr_in *); void cxgb3i_c3cn_rx_credits(struct s3_conn *, int); -int cxgb3i_c3cn_send_pdus(struct s3_conn *, struct sk_buff *, int); +int cxgb3i_c3cn_send_pdus(struct s3_conn *, struct sk_buff *); void cxgb3i_c3cn_release(struct s3_conn *); -/* - * Definitions for sk_buff state and ULP mode management. +/** + * cxgb3_skb_cb - control block for received pdu state and ULP mode management. + * + * @flag: see C3CB_FLAG_* below + * @ulp_mode: ULP mode/submode of sk_buff + * @seq: tcp sequence number + * @ddigest: pdu data digest + * @pdulen: recovered pdu length + * @ulp_data: scratch area for ULP */ - struct cxgb3_skb_cb { - __u8 flags; /* see C3CB_FLAG_* below */ - __u8 ulp_mode; /* ULP mode/submode of sk_buff */ - __u32 seq; /* sequence number */ - __u32 ddigest; /* ULP rx_data_ddp selected field */ - __u32 pdulen; /* ULP rx_data_ddp selected field */ - __u8 ulp_data[16]; /* scratch area for ULP */ + __u8 flags; + __u8 ulp_mode; + __u32 seq; + __u32 ddigest; + __u32 pdulen; + __u8 ulp_data[16]; }; #define CXGB3_SKB_CB(skb) ((struct cxgb3_skb_cb *)&((skb)->cb[0])) @@ -170,28 +198,14 @@ struct cxgb3_skb_cb { #define skb_ulp_pdulen(skb) (CXGB3_SKB_CB(skb)->pdulen) #define skb_ulp_data(skb) (CXGB3_SKB_CB(skb)->ulp_data) -enum { +enum c3cb_flags { C3CB_FLAG_NEED_HDR = 1 << 0, /* packet needs a TX_DATA_WR header */ C3CB_FLAG_NO_APPEND = 1 << 1, /* don't grow this skb */ - C3CB_FLAG_BARRIER = 1 << 2, /* set TX_WAIT_IDLE after sending */ - C3CB_FLAG_COMPL = 1 << 4, /* request WR completion */ + C3CB_FLAG_COMPL = 1 << 2, /* request WR completion */ }; -/* - * Top-level CPL message processing used by most CPL messages that - * pertain to connections. - */ -static inline void process_cpl_msg(void (*fn)(struct s3_conn *, - struct sk_buff *), - struct s3_conn *c3cn, - struct sk_buff *skb) -{ - spin_lock(&c3cn->lock); - fn(c3cn, skb); - spin_unlock(&c3cn->lock); -} - -/* +/** + * sge_opaque_hdr - * Opaque version of structure the SGE stores at skb->head of TX_DATA packets * and for which we must reserve space. */ @@ -204,9 +218,6 @@ struct sge_opaque_hdr { #define TX_HEADER_LEN \ (sizeof(struct tx_data_wr) + sizeof(struct sge_opaque_hdr)) -void *cxgb3i_alloc_big_mem(unsigned int); -void cxgb3i_free_big_mem(void *); - /* * get and set private ip for iscsi traffic */ -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html