[PATCH v1 09/10] mlx4_fc: Implement fcoe/fcoib offload driver, fcoib initialization protocol driver

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




>From 0b10d95be067595dbb050d3cc2c779372038aec4 Mon Sep 17 00:00:00 2001
From: Vu Pham <vu@xxxxxxxxxxxxxxxxx>
Date: Mon, 16 Aug 2010 14:47:34 -0700
Subject: [PATCH 09/10] mlx4_fc: Implement fcoe/fcoib offload driver, fcoib initialization protocol driver

Implement fcoe/fcoib offload driver. The driver utilizes mlx4_device to
completely offload SCSI operations, and FC-CRC calculations.

Implement mlx4_fcoib driver which uses FIP-alike protocol to discover
BridgeX gateways in the Infiniband fabric

Signed-off-by: Oren Duer <oren@xxxxxxxxxxxxxx>
Signed-off-by: Vu Pham <vu@xxxxxxxxxxxx>
---
 drivers/scsi/mlx4_fc/Makefile         |    8 +
 drivers/scsi/mlx4_fc/fcoib.h          |  343 ++++++
 drivers/scsi/mlx4_fc/fcoib_api.h      |   61 +
 drivers/scsi/mlx4_fc/fcoib_discover.c | 1925 +++++++++++++++++++++++++++++++
 drivers/scsi/mlx4_fc/fcoib_main.c     | 1211 ++++++++++++++++++++
 drivers/scsi/mlx4_fc/mfc.c            | 2003 +++++++++++++++++++++++++++++++++
 drivers/scsi/mlx4_fc/mfc.h            |  666 +++++++++++
 drivers/scsi/mlx4_fc/mfc_exch.c       | 1496 ++++++++++++++++++++++++
 drivers/scsi/mlx4_fc/mfc_rfci.c       | 1001 ++++++++++++++++
 drivers/scsi/mlx4_fc/mfc_sysfs.c      |  244 ++++
 10 files changed, 8958 insertions(+), 0 deletions(-)
 create mode 100644 drivers/scsi/mlx4_fc/Makefile
 create mode 100644 drivers/scsi/mlx4_fc/fcoib.h
 create mode 100644 drivers/scsi/mlx4_fc/fcoib_api.h
 create mode 100644 drivers/scsi/mlx4_fc/fcoib_discover.c
 create mode 100644 drivers/scsi/mlx4_fc/fcoib_main.c
 create mode 100644 drivers/scsi/mlx4_fc/mfc.c
 create mode 100644 drivers/scsi/mlx4_fc/mfc.h
 create mode 100644 drivers/scsi/mlx4_fc/mfc_exch.c
 create mode 100644 drivers/scsi/mlx4_fc/mfc_rfci.c
 create mode 100644 drivers/scsi/mlx4_fc/mfc_sysfs.c

diff --git a/drivers/scsi/mlx4_fc/Makefile b/drivers/scsi/mlx4_fc/Makefile
new file mode 100644
index 0000000..9109483
--- /dev/null
+++ b/drivers/scsi/mlx4_fc/Makefile
@@ -0,0 +1,8 @@
+obj-m += mlx4_fc.o
+mlx4_fc-y := mfc.o \
+	mfc_rfci.o \
+	mfc_exch.o \
+	mfc_sysfs.o
+
+obj-m += mlx4_fcoib.o
+mlx4_fcoib-y := fcoib_main.o fcoib_discover.o
diff --git a/drivers/scsi/mlx4_fc/fcoib.h b/drivers/scsi/mlx4_fc/fcoib.h
new file mode 100644
index 0000000..1c94275
--- /dev/null
+++ b/drivers/scsi/mlx4_fc/fcoib.h
@@ -0,0 +1,343 @@
+/*
+ * Copyright (c) 2010 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX4_FCOIB_H
+#define MLX4_FCOIB_H
+
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <net/dst.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+#include <rdma/ib_sa.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/workqueue.h>
+#include <linux/version.h>
+
+struct fip_dev_priv;
+
+/* Extern Variables */
+extern int fip_debug;
+extern struct workqueue_struct *fip_workqueue;
+
+/* definitions */
+#define DRV_NAME  "mlx4_fcoib"
+
+#define	FIP_OP_RECV	(1ul << 31)
+#define FIP_UD_MTU(ib_mtu)	(ib_mtu - FIP_ENCAP_LEN - FIP_ETH_HEADER_LEN)
+#define FIP_UD_BUF_SIZE(ib_mtu)	(ib_mtu + IB_GRH_BYTES)
+#define	FIP_MAX_BACKOFF_SECONDS	16
+#define	FIP_MAX_VHBAS_PER_GW	256
+#define FIP_DISCOVER_NUM_MCAST		2
+
+#define VHBAS_BITMASK	(FIP_MAX_VHBAS_PER_GW / 8 / sizeof(unsigned long))
+#define DELAYED_WORK_CLEANUP_JIFFS	2
+
+enum debug_print_level {
+	LOG_PRIO_HIGH = 1,
+	LOG_PRIO_MED = 2,
+	LOG_PRIO_LOW = 3,
+	LOG_PRIO_VERY_LOW = 4
+};
+
+#define fip_printk(level, priv, format, arg...)			\
+	printk(level "mlx4_fcoib: %s:%d: " format,		\
+		((struct fip_dev_priv *) priv)->ca->name,	\
+		((struct fip_dev_priv *) priv)->port, ## arg)
+
+#define fip_warn(priv, format, arg...)				\
+	fip_printk(KERN_WARNING, priv, format , ## arg)
+
+#define fip_dbg(priv, level, format, arg...)			\
+	if (fip_debug >= level)					\
+		fip_printk(KERN_WARNING, priv, format , ## arg)
+
+struct fip_mcast {
+	struct login_ctx *login;
+	char name[ETH_ALEN * 2 + IFNAMSIZ];
+	u8 mac[ETH_ALEN];
+	int vid;
+	union ib_gid gid;
+	u8 rss;
+	struct rb_node rb_node;
+	struct mcast_entry *mcast_data;
+};
+
+struct port_mcast_data {
+	struct list_head multicast_list;
+	struct delayed_work mcast_task;
+	struct mutex mlock;
+	unsigned long flags;
+
+	u8 port;
+	struct ib_pd *pd;
+	union ib_gid local_gid;
+	unsigned int mcast_mtu;
+	int rate;
+	struct ib_device *ca;
+};
+
+enum mcast_join_state {
+	MCAST_FLAG_USED = 0,
+	MCAST_FLAG_SEND = 1,
+	MCAST_FLAG_RECV = 2,
+	MCAST_FLAG_BUSY = 3,
+	MCAST_FLAG_JOINED = 4,
+	MCAST_FLAG_DONE = 5,
+	MCAST_FLAG_ATTACHED = 6,
+	MCAST_FLAG_AH_SET = 7,
+	MCAST_FLAG_REMOVED = 8
+};
+
+enum mcast_join_type {
+	MCAST_SEND_RECEIVE = 0,
+	MCAST_RECEIVE_ONLY = 1,
+	MCAST_SEND_ONLY = 2,
+};
+
+enum {
+	MCAST_TASK_RUN = 1,
+	MCAST_TASK_STOPPED = 2,
+};
+
+struct mcast_entry {
+	struct ib_sa_multicast *sa_mcast;
+	struct ib_sa_mcmember_rec mcmember;
+	struct list_head list;
+	unsigned long flags;
+	struct ib_ah *ah;
+	struct port_mcast_data *port_mcast;
+	atomic_t ref_cnt;
+	int backoff;
+	void (*callback) (struct mcast_entry *, void *context);
+	void *context;
+	struct ib_qp *qp;
+	u32 qkey;
+	u32 pkey;
+};
+
+enum {
+	FIP_ETH_HEADER_LEN = 14,
+	FIP_ENCAP_LEN = 4,
+	FIP_PROTOCOL_RX_SIZE = 64,	/* must be power of 2 */
+	FIP_PROTOCOL_TX_SIZE = 64,	/* must be power of 2 */
+};
+
+enum fip_packet_type {
+	FIP_DISCOVER_UCAST = 0,
+	FIP_DISCOVER_MCAST = 1
+};
+
+struct ring_entry {
+	char *mem;
+	u64 bus_addr;
+	int length;
+};
+
+struct ring {
+	int size;
+	struct ring_entry *ring;
+	int head;
+	int tail;
+};
+
+enum fip_discover_state {
+	FIP_DISCOVER_OFF,
+	FIP_DISCOVER_INIT,
+	FIP_DISCOVER_SOLICIT,
+	FIP_DISCOVER_LOGIN
+};
+
+struct fip_discover {
+	spinlock_t lock;
+	struct list_head gw_list;
+	struct list_head gw_rm_list;
+	enum fip_discover_state state;
+	int flush;
+	struct semaphore flush_done;
+	struct ib_cq *cq;
+	struct ib_qp *qp;
+	struct ring rx_ring;
+	struct ring tx_ring;
+
+	u16 pkey;
+	u16 pkey_index;
+	struct delayed_work task;
+	struct delayed_work cleanup_task;
+	struct work_struct pkt_rcv_task;
+	struct work_struct mcast_refresh_task;
+
+	int mcast_dest_mask;
+	struct mcast_entry *mcast[FIP_DISCOVER_NUM_MCAST];
+
+	int backoff_time;
+};
+
+enum fip_gw_state {
+	FIP_GW_RESET,
+	FIP_GW_RCVD_UNSOL_AD,
+	FIP_GW_SENT_SOL,
+	FIP_GW_RCVD_SOL_AD,
+	FIP_GW_WAITING_FOR_FLOGI,
+	FIP_GW_SENT_FLOGI,
+	FIP_GW_RCVD_FLOGI_ACCPT,
+};
+
+struct fip_gw_data_info {
+	int flags;
+	u32 gw_qpn;
+	u16 gw_lid;
+	u16 gw_port_id;
+	u16 gw_num_vnics;
+	u8 gw_guid[8];
+	u8 switch_name[8];
+	u8 fabric_name[8];
+	u32 keep_alive_frq;
+	u8 gw_vendor_id[9];
+	u8 priority;
+	u16 pkey;
+	u8 sl;
+};
+
+struct fip_gw_data {
+	int flush;
+	struct fip_dev_priv *priv;
+	struct list_head list;
+	enum fip_gw_state state;
+	struct list_head fip_destroy;
+	struct delayed_work gw_task;
+	struct delayed_work fip_cleanup_task;
+	struct fip_gw_data_info info;
+	struct fip_gw_data_info *new_gw_data;	/* used for GW modification */
+	unsigned long bitmask[VHBAS_BITMASK];
+
+	/* vHBA info - currently support single vHBA per gw */
+	u64 fc_handle;
+
+	/* unified timers */
+	unsigned long vhba_ka_tmr;
+	int vhba_ka_tmr_valid;
+	unsigned long gw_ka_tmr;
+	int gw_ka_tmr_valid;
+	unsigned long host_ka_tmr;
+	int host_ka_tmr_valid;
+};
+
+enum fip_gw_data_flags {
+	FIP_IS_FIP = 1,		/* protocol type */
+	FIP_RCV_MULTICAST = 1 << 1,	/* received mcast packet */
+	FIP_GW_AVAILABLE = 1 << 2,	/* GW available bit set in pkt */
+	FIP_HOST_ASSIGNED_VLAN = 1 << 3	/* H bit set in advertise pkt */
+};
+
+struct fip_dev_priv {
+	spinlock_t lock;
+	struct mutex mlock;
+	struct fip_discover discover;
+	struct port_mcast_data mcast;
+
+	struct delayed_work restart_task;
+	struct ib_device *ca;
+	u8 port;
+	u16 pkey;
+	u16 pkey_index;
+	struct ib_pd *pd;
+	struct ib_mr *mr;
+	union ib_gid local_gid;
+	u16 local_lid;
+
+	int max_mtu_enum;
+	unsigned int mtu;
+	unsigned int mcast_mtu;
+	int rate;
+	unsigned int max_ib_mtu;
+	struct ib_event_handler event_handler;
+	struct list_head list;
+
+	int hca_caps;
+
+};
+
+/*
+ * send a single multicast packet.
+ */
+int fip_mcast_send(struct fip_dev_priv *priv, struct ib_qp *qp,
+		   unsigned int wr_id, u64 mapping, int size,
+		   u16 pkey_index, struct mcast_entry *mcast);
+/*
+ * send a single unicast packet.
+ */
+int fip_ucast_send(struct fip_dev_priv *priv, struct ib_qp *qp,
+		   unsigned int wr_id, u64 mapping, int size,
+		   u16 pkey_index, u32 dest_qpn, u16 dlid, u32 qkey);
+
+int fip_init_qp(struct fip_dev_priv *priv, struct ib_qp *qp,
+		u16 pkey_index, u32 qkey);
+int fip_post_receive(struct fip_dev_priv *priv, struct ib_qp *qp, int size,
+		     int id, char *mem, struct ring_entry *mem_entry);
+
+void fip_flush_rings(struct fip_dev_priv *priv, struct ib_cq *cq,
+		     struct ib_qp *qp, struct ring *rx, struct ring *tx);
+void fip_free_rings(struct fip_dev_priv *p, struct ring *rx, struct ring *tx);
+
+int fip_init_tx(struct fip_dev_priv *priv, int size, struct ring *tx_ring);
+int fip_init_rx(struct fip_dev_priv *priv, int size,
+		struct ib_qp *qp, struct ring *rx_ring);
+int fip_comp(struct fip_dev_priv *priv, struct ib_cq *cq,
+	     struct ring *rx_ring, struct ring *tx_ring);
+void fip_discover_comp(struct ib_cq *cq, void *dev_ptr);
+void fip_discover_fsm(struct work_struct *work);
+int fip_discover_rx_packet(struct fip_dev_priv *priv, int index);
+void fip_discover_process_rx(struct work_struct *work);
+
+void fip_discover_mcast_connect_cb(struct mcast_entry *mcast,
+				   void *discover_context);
+struct mcast_entry *fip_mcast_join(struct port_mcast_data *port_mcast,
+				    void *context, const char *mgid, u32 qkey,
+				    u16 pkey, struct ib_qp *qp,
+				    enum mcast_join_type type,
+				    void (*callback) (struct mcast_entry *,
+						      void *context));
+void fip_mcast_free(struct mcast_entry *mcast);
+int fip_mcast_stop_thread(struct port_mcast_data *port_mcast);
+void fip_mcast_join_task(struct work_struct *work);
+
+int fip_free_gw_list(struct fip_dev_priv *priv);
+void fip_refresh_mcasts(struct work_struct *work);
+
+int fip_dev_init(struct fip_dev_priv *priv);
+void fip_dev_cleanup(struct fip_dev_priv *priv);
+int fip_discover_init(struct fip_dev_priv *priv);
+void fip_discover_cleanup(struct fip_dev_priv *priv);
+
+#endif /* MLX4_FCOIB_H */
diff --git a/drivers/scsi/mlx4_fc/fcoib_api.h b/drivers/scsi/mlx4_fc/fcoib_api.h
new file mode 100644
index 0000000..945516b
--- /dev/null
+++ b/drivers/scsi/mlx4_fc/fcoib_api.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2010 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef FCOIB_API_H
+#define FCOIB_API_H
+
+/* This .h file is used to integrate the mlx4_fc module with
+ * the FCoIB discovery module.
+ *
+ * mlx4_fc will implement these functions.
+ */
+
+struct ib_device;
+
+enum els_over_fip_type {
+	FLOGI_OVER_FIP = 0,
+	LOGO_OVER_FIP = 1,
+};
+
+typedef int (*fcoib_send_els_cb) (u64 gw_discovery_handle, u64 gw_fc_handle,
+				  enum els_over_fip_type type,
+				  u8 *els, u32 host_data_qpn);
+int fcoib_create_vhba(struct ib_device *ib_device, u8 port_num,
+		      unsigned int mtu, u16 gw_lid, u8 sl,
+		      u64 gw_disc_hl, fcoib_send_els_cb send_els_cb,
+		      u64 wwpn, u64 wwnn);
+int fcoib_recvd_flogi_reply(u64 gw_fc_handle, u8 *flogi_reply,
+			    int size, u32 gw_data_qpn);
+void fcoib_destroy_vhba(u64 gw_fc_handle);
+void fcoib_get_vhba_fcid(u64 gw_fc_handle, uint8_t *fcid);
+
+#endif /* FCOIB_API_H */
diff --git a/drivers/scsi/mlx4_fc/fcoib_discover.c b/drivers/scsi/mlx4_fc/fcoib_discover.c
new file mode 100644
index 0000000..ee57d76
--- /dev/null
+++ b/drivers/scsi/mlx4_fc/fcoib_discover.c
@@ -0,0 +1,1925 @@
+/*
+ * Copyright (c) 2010 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/delay.h>
+#include <rdma/ib_verbs.h>
+
+#include "fcoib.h"
+#include "fcoib_api.h"
+
+/* string "Mellanox" */
+#define FIP_VENDOR_MELLANOX {0x4d, 0x65, 0x6c, 0x6c, \
+			     0x61, 0x6e, 0x6f, 0x78}
+
+#define FIP_TEST_PKT_LENGTH(length, type)				\
+	if ((length) != sizeof(type) + IB_GRH_BYTES) {			\
+		fip_dbg(priv, LOG_PRIO_LOW, "Dump packet: at=%d"	\
+			" unexpected size. length=%d expected=%d\n",	\
+			__LINE__, (int)length,				\
+			(int)(sizeof(type) + IB_GRH_BYTES));		\
+		return -EINVAL;						\
+	}
+
+struct fip_fcoib_ver {
+	u8 version;
+	u8 reserved[3];
+};
+
+struct fip_fip_type {
+	u8 type;
+	u8 length;
+	u8 reserved[2];
+};
+
+struct fip_fip_header {
+	u16 opcode;
+	u8 reserved;
+	u8 subcode;
+	u16 list_length;
+	u16 flags;
+	struct fip_fip_type type;
+	u8 vendor_id[8];
+};
+
+struct fcoib_solicit {
+	struct fip_fcoib_ver version;
+	struct fip_fip_header fip;
+
+	u8 infiniband_address_type_f;
+	u8 infiniband_address_length_f;
+	u16 _reserved_1;
+	u8 t10_vendor_id[8];
+	u32 qpn;
+	u16 sl_gwPortId;
+	u16 lid;
+	u8 gw_guid[8];
+
+	u8 fip_name_id_type_f;
+	u8 fip_name_id_length_f;
+	u16 _reserved_2;
+	u8 node_name[8];
+
+	u8 max_receive_size_type_f;
+	u8 max_receive_size_length_f;
+	u16 max_fcoe_size;
+};
+
+struct fcoib_advertise {
+	struct fip_fcoib_ver version;
+	struct fip_fip_header fip;
+
+	u8 fip_priority_type_f;
+	u8 fip_priority_length_f;
+	u8 _reserved_1;
+	u8 priority;
+
+	u8 infiniband_address_type_f;
+	u8 infiniband_address_length_f;
+	u16 _reserved_2;
+	u8 t10_vendor_id[8];
+	u32 qpn;
+	u16 sl_gwportid;
+	u16 lid;
+	u8 gw_guid[8];
+
+	u8 fip_name_identifier_type_f;
+	u8 fip_name_identifier_length_f;
+	u16 _reserved_3;
+	u8 switch_name[8];
+
+	u8 fip_fabric_name_type_f;
+	u8 fip_fabric_name_length_f;
+	u16 _reserved_4;
+	u32 fc_map;
+	u8 fabric_name[8];
+
+	u8 fka_adv_period_type_f;
+	u8 fka_adv_period_length_f;
+	u16 _reserved_5;
+	u32 fka_adv_period;
+
+	u8 partition_type_f;
+	u8 partition_length_f;
+	u16 reserved_6;
+	u8 t10_vendor_id_2[8];
+	u16 reserved_7;
+	u16 pkey;
+};
+
+#define FLOGI_FDISC_REQUEST_SIZE (35 * 4)
+#define FLOGI_FDISC_ACCPT_SIZE (35 * 4)
+#define FLOGI_FDISC_RJCT_SIZE (8 * 4)
+
+struct fcoib_flogi_fdisc_request {
+	struct fip_fcoib_ver version;
+	struct fip_fip_header fip;
+
+	u8 els_type_f;
+	u8 els_length_f;
+	u16 _reserved_;
+	u8 els[FLOGI_FDISC_REQUEST_SIZE];
+
+	u8 infiniband_address_type_f;
+	u8 infiniband_address_length_f;
+	u16 reserved;
+	u8 t10_vendor_id[8];
+	u32 qpn;
+	u16 sl_gwportid;
+	u16 lid;
+	u8 port_guid[8];
+};
+
+struct fcoib_flogi_fdisc_acc {
+	struct fip_fcoib_ver version;
+	struct fip_fip_header fip;
+
+	u8 els_type_f;
+	u8 els_length_f;
+	u16 _reserved_;
+	u8 els[FLOGI_FDISC_ACCPT_SIZE];
+
+	u8 infiniband_address_type_f;
+	u8 infiniband_address_length_f;
+	u16 reserved;
+	u8 t10_vendor_id[8];
+	u32 qpn;
+	u16 sl_gwPortId;
+	u16 lid;
+	u8 port_guid[8];
+};
+
+struct fcoib_flogi_fdisc_rjt {
+	struct fip_fcoib_ver version;
+	struct fip_fip_header fip;
+
+	u8 els_type_f;
+	u8 els_length_f;
+	u16 _reserved_;
+	u8 els[FLOGI_FDISC_RJCT_SIZE];
+};
+
+#define LOGO_REQUEST_SIZE (10 * 4)
+#define LOGO_ACCPT_SIZE (9 * 4)
+#define LOGO_RJCT_SIZE (8 * 4)
+
+struct fcoib_logo_request {
+	struct fip_fcoib_ver version;
+	struct fip_fip_header fip;
+
+	u8 els_type_f;
+	u8 els_length_f;
+	u16 _reserved_;
+	u8 els[LOGO_REQUEST_SIZE];
+
+	u8 infiniband_address_type_f;
+	u8 infiniband_address_length_f;
+	u16 reserved;
+	u8 t10_vendor_id[8];
+	u32 qpn;
+	u16 sl_gwportid;
+	u16 lid;
+	u8 port_guid[8];
+};
+
+struct fcoib_ioa_alive {
+	struct fip_fcoib_ver version;
+	struct fip_fip_header fip;
+
+	uint8_t infiniband_address_type_f;
+	uint8_t infiniband_address_length_f;
+	u16 reserved;
+	u8 t10_vendor_id[8];
+	u32 qpn;
+	u16 sl_gwportid;
+	u16 lid;
+	u8 port_guid[8];
+};
+
+struct fcoib_vhba_alive {
+	struct fip_fcoib_ver version;
+	struct fip_fip_header fip;
+
+	u8 infiniband_address_type_f;
+	u8 infiniband_address_length_f;
+	u16 reserved;
+	u8 t10_vendor_id[8];
+	u32 qpn;
+	u16 sl_gwportid;
+	u16 lid;
+	u8 port_guid[8];
+
+	u8 infiniband_vx_port_id_type_f;
+	u8 infiniband_vx_port_id_length_f;
+	u16 reserved_2;
+	u8 t10_vendor_id_2[8];
+	u32 vn_port_qpn;
+	u8 vn_port_guid[8];
+	u32 vn_port_addres_id;
+	u8 vn_port_name[8];
+};
+
+struct fcoib_clear_virtual_link_ioa {
+	struct fip_fcoib_ver version;
+	struct fip_fip_header fip;
+
+	u8 infiniband_address_type_f;
+	u8 infiniband_address_length_f;
+	u16 reserved;
+	u8 t10_vendor_id[8];
+	u32 qpn;
+	u16 sl_gwPortId;
+	u16 lid;
+	u8 gw_guid[8];
+
+	u8 fip_name_identifier_type_f;
+	u8 fip_name_identifier_length_f;
+	u16 reserved_3;
+	u8 switch_name[8];
+};
+
+struct fcoib_clear_virtual_link_vhba {
+	struct fip_fcoib_ver version;
+	struct fip_fip_header fip;
+
+	u8 infiniband_address_type_f;
+	u8 infiniband_address_length_f;
+	u16 reserved;
+	u8 t10_vendor_id[8];
+	u32 qpn;
+	u16 sl_gwPortId;
+	u16 lid;
+	u8 gw_guid[8];
+
+	u8 fip_name_identifier_type_f;
+	u8 fip_name_identifier_length_f;
+	u16 reserved_3;
+	u8 switch_name[8];
+
+	/* TODO: array of items */
+	u8 infiniband_vx_port_id_type_f;
+	u8 infiniband_vx_port_id_length_f;
+	u16 reserved_2;
+	u8 t10_vendor_id_2[8];
+	u32 vn_port_qpn;
+	u8 vn_port_guid[8];
+	u32 vn_port_addres_id;
+	u8 vn_port_name[8];
+};
+
+enum fip_packet_fields {
+	FCOIB_FIP_OPCODE = 0xFFF8,
+	EOIB_FIP_OPCODE = 0xFFF9,
+	FIP_FIP_HDR_LENGTH = 3,
+	FIP_FIP_HDR_TYPE = 13,
+
+	FIP_HOST_SOL_SUB_OPCODE = 0x1,
+	FIP_GW_ADV_SUB_OPCODE = 0x2,
+	FIP_HOST_LOGIN_SUB_OPCODE = 0x3,
+	FIP_GW_LOGIN_SUB_OPCODE = 0x4,
+	FIP_HOST_LOGOUT_SUB_OPCODE = 0x5,
+	FIP_GW_UPDATE_SUB_OPCODE = 0x6,
+	FIP_GW_TABLE_SUB_OPCODE = 0x7,
+	FIP_HOST_ALIVE_SUB_OPCODE = 0x8,
+
+	FCOIB_HOST_SOL_SUB_OPCODE = 0x1,
+	FCOIB_GW_ADV_SUB_OPCODE = 0x2,
+	FCOIB_LS_REQUEST_SUB_OPCODE = 0x3,
+	FCOIB_LS_REPLY_SUB_OPCODE = 0x4,
+	FCOIB_HOST_ALIVE_SUB_OPCODE = 0x8,
+	FCOIB_CLVL_SUB_OPCODE = 0x9,
+
+	FIP_FIP_FCF_FLAG = 0x1,
+	FIP_FIP_SOLICITED_FLAG = 0x2,
+	FIP_FIP_ADVRTS_FLAG = 0x4,
+	FIP_FIP_FP_FLAG = 0x80,
+	FIP_FIP_SP_FLAG = 0x40,
+
+	FIP_BASIC_LENGTH = 7,
+	FIP_BASIC_TYPE = 240,
+
+	FIP_ADVERTISE_LENGTH_1 = 4,
+	FIP_ADVERTISE_TYPE_1 = 241,
+	FIP_ADVERTISE_HOST_VLANS = 0x80,
+
+	FIP_LOGIN_LENGTH_1 = 13,
+	FIP_LOGIN_TYPE_1 = 242,
+	FIP_LOGIN_LENGTH_2 = 4,
+	FIP_LOGIN_TYPE_2 = 246,
+
+	FIP_LOGIN_V_FLAG = 0x8000,
+	FIP_LOGIN_M_FLAG = 0x4000,
+	FIP_LOGIN_VP_FLAG = 0x2000,
+	FIP_LOGIN_DMAC_MGID_MASK = 0x3F,
+	FIP_LOGIN_RSS_MGID_MASK = 0x0F,
+	FIP_LOGIN_RSS_SHIFT = 4,
+
+	FIP_LOGOUT_LENGTH_1 = 13,
+	FIP_LOGOUT_TYPE_1 = 245,
+
+	FIP_HOST_UPDATE_LENGTH = 13,
+	FIP_HOST_UPDATE_TYPE = 245,
+	FIP_HOST_VP_FLAG = 0x01,
+	FIP_HOST_U_FLAG = 0x80,
+	FIP_HOST_R_FLAG = 0x40,
+
+	FIP_CONTEXT_UP_LENGTH = 9,
+	FIP_CONTEXT_UP_TYPE = 243,
+	FIP_CONTEXT_V_FLAG = 0x80,
+	FIP_CONTEXT_RSS_FLAG = 0x40,
+	FIP_CONTEXT_TYPE_MASK = 0x0F,
+
+	FIP_CONTEXT_TBL_TYPE = 244,
+	FIP_CONTEXT_TBL_SEQ_MASK = 0xC0,
+	FIP_CONTEXT_TBL_SEQ_FIRST = 0x40,
+	FIP_CONTEXT_TBL_SEQ_LAST = 0x80,
+
+	FKA_ADV_PERIOD = 8,
+
+	FIP_PRIORITY_TYPE = 1,
+	FIP_PRIORITY_LENGTH = 1,
+	FIP_MAC_TYPE = 2,
+	FIP_MAC_LENGTH = 2,
+	FIP_FC_MAP_TYPE = 3,
+	FIP_FC_MAP_LENGTH = 2,
+	FIP_NAME_IDENTIFIER_TYPE = 4,
+	FIP_NAME_IDENTIFIER_LENGTH = 3,
+	FIP_FABRIC_NAME_TYPE = 5,
+	FIP_FABRIC_NAME_LENGTH = 4,
+	MAX_RECEIVE_SIZE_TYPE = 6,
+	MAX_RECEIVE_SIZE_LENGTH = 1,
+	FLOGI_TYPE = 7,
+	FLOGI_REQUEST_LENGTH = 36,
+	FLOGI_ACCEPT_LENGTH = 36,
+	FLOGI_REJECT_LENGTH = 9,
+
+	FDISC_TYPE = 8,
+	FDISC_REQUEST_LENGTH = 36,
+	FDISC_ACCEPT_LENGTH = 36,
+	FDISC_REJECT_LENGTH = 9,
+	LOGO_TYPE = 9,
+	LOGO_REQUEST_LENGTH = 11,
+	LOGO_ACCEPT_LENGTH = 10,
+	LOGO_REJECT_LENGTH = 9,
+	VX_PORT_ID_TYPE = 11,
+	VX_PORT_ID_LENGTH = 5,
+	FKA_ADV_PERIOD_TYPE = 12,
+	FKA_ADV_PERIOD_LENGTH = 2,
+	INFINIBAND_ADDRESS_TYPE = 240,
+	INFINIBAND_ADDRESS_LENGTH = 7,
+	EOIB_GW_INFORMATION_TYPE = 241,
+	EOIB_GW_INFORMATION_LENGTH = 4,
+	VNIC_LOGIN_OR_ACK_INFORMATION_TYPE = 242,
+	VNIC_LOGIN_OR_ACK_INFORMATION_LENGTH = 13,
+	VHUB_UPDATE_TYPE = 243,
+	VHUB_UPDATE_LENGTH = 9,
+	VHUB_TABLE_TYPE = 244,
+	VNIC_IDENTITY_TYPE = 245,
+	VNIC_IDENTITY_LENGTH = 13,
+	PARTITION_TYPE = 246,
+	PARTITION_LENGTH = 4,
+	INFINIBAND_VX_PORT_ID_TYPE = 247,
+	INFINIBAND_VX_PORT_ID_LENGTH = 10,
+	BXM_TUNNELED_PACKET_TYPE = 250,
+	BXM_COMMAND_TYPE = 251,
+	FIP_VENDOR_ID_TYPE = 13,
+	FIP_VENDOR_ID_LENGTH = 3,
+};
+
+const char FIP_DISCOVER_MGID[16] = {
+	0xFF, 0x12, 0xFC, 0x1B,
+	0x00, 0x06, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00
+};
+
+const char FIP_SOLICIT_MGID[16] = {
+	0xFF, 0x12, 0xFC, 0x1B,
+	0x00, 0x07, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00
+};
+
+const u32 FCOIB_FIP_QKEY = 0x80020004;
+
+static void fip_gw_fsm(struct work_struct *work);
+static void fip_purge_gws(struct work_struct *work);
+
+static inline int _map_generic_pkt(struct fip_dev_priv *priv,
+				   struct ring_entry *tx_ring_entry,
+				   char *mem, int pkt_size)
+{
+	/* alloc packet to be sent */
+	tx_ring_entry->mem = mem;
+
+	/* map packet to bus */
+	tx_ring_entry->length = pkt_size;
+	tx_ring_entry->bus_addr = ib_dma_map_single(priv->ca,
+						    tx_ring_entry->mem,
+						    pkt_size, DMA_TO_DEVICE);
+
+	if (unlikely(ib_dma_mapping_error(priv->ca, tx_ring_entry->bus_addr))) {
+		fip_warn(priv, "send_generic_pkt failed to map to pci\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static inline int send_generic_mcast_pkt(struct fip_dev_priv *priv,
+					 struct ring *tx_ring,
+					 char *mem, int pkt_size,
+					 struct ib_qp *qp,
+					 int pkey_index,
+					 struct mcast_entry *mcast)
+{
+	int index, ret;
+
+	/*
+	 * we are only allowed to update the head at task level so no need to
+	 * perform any locks here
+	 */
+	index = tx_ring->head;
+	fip_dbg(priv, LOG_PRIO_LOW, "send mcast packet\n");
+
+	/* it is possible for the AH to be missing in transient
+	 * states (after events) */
+	if (!mcast || !test_bit(MCAST_FLAG_AH_SET, &mcast->flags))
+		return -EBUSY;
+
+	/* ring full try again */
+	if (index == tx_ring->tail) {
+		fip_warn(priv, "send_generic_pkt ring full\n");
+		return -EAGAIN;
+	}
+
+	ret = _map_generic_pkt(priv, &tx_ring->ring[index], mem, pkt_size);
+	if (ret)
+		return ret;
+
+	ret = fip_mcast_send(priv, qp, tx_ring->head,
+			     tx_ring->ring[index].bus_addr,
+			     pkt_size, pkey_index, mcast);
+
+	if (ret) {
+		fip_warn(priv,
+			 "send_generic_mcast_pkt: fip_mcast_send ret=%d\n",
+			 ret);
+		ret = -EINVAL;
+		goto error_unmap_dma;
+	}
+
+	tx_ring->head = (index + 1) & (tx_ring->size - 1);
+
+	return 0;
+
+error_unmap_dma:
+	ib_dma_unmap_single(priv->ca,
+			    tx_ring->ring[index].bus_addr,
+			    pkt_size, DMA_TO_DEVICE);
+	return -ENODEV;
+}
+
+static inline int send_generic_ucast_pkt(struct fip_dev_priv *priv,
+					 struct ring *tx_ring,
+					 char *mem, int pkt_size,
+					 struct ib_qp *qp,
+					 int pkey_index,
+					 u32 dst_qpn, u16 dst_lid, u32 qkey)
+{
+	int index, ret;
+
+	/*
+	 * we are only allowed to update the head at task level so no need to
+	 * perform any locks here
+	 */
+	index = tx_ring->head;
+
+	fip_dbg(priv, LOG_PRIO_LOW, "send ucast packet\n");
+
+	/* ring full try again */
+	if (index == tx_ring->tail) {
+		fip_warn(priv, "send_generic_pkt ring full\n");
+		return -EAGAIN;
+	}
+
+	ret = _map_generic_pkt(priv, &tx_ring->ring[index], mem, pkt_size);
+	if (ret)
+		return ret;
+
+	ret = fip_ucast_send(priv, qp,
+			     tx_ring->head, tx_ring->ring[index].bus_addr,
+			     pkt_size, priv->pkey_index,
+			     dst_qpn, dst_lid, qkey);
+
+	if (ret) {
+		fip_warn(priv,
+			 "send_generic_ucast_pkt: fip_ucast_send ret=%d\n",
+			 ret);
+		ret = -EINVAL;
+		goto error_unmap_dma;
+	}
+
+	tx_ring->head = (index + 1) & (tx_ring->size - 1);
+
+	return 0;
+
+error_unmap_dma:
+	ib_dma_unmap_single(priv->ca,
+			    tx_ring->ring[index].bus_addr,
+			    pkt_size, DMA_TO_DEVICE);
+	return -ENODEV;
+}
+
+const struct fcoib_solicit base_fcoib_solicit_pkt = {
+	.fip.subcode = FCOIB_HOST_SOL_SUB_OPCODE,
+	.fip.type.type = FIP_FIP_HDR_TYPE,
+	.fip.type.length = FIP_FIP_HDR_LENGTH,
+	.fip.vendor_id = FIP_VENDOR_MELLANOX,
+
+	.infiniband_address_type_f = INFINIBAND_ADDRESS_TYPE,
+	.infiniband_address_length_f = INFINIBAND_ADDRESS_LENGTH,
+	.t10_vendor_id = "mellanox",
+
+	.fip_name_id_type_f = FIP_NAME_IDENTIFIER_TYPE,
+	.fip_name_id_length_f = FIP_NAME_IDENTIFIER_LENGTH,
+
+	.max_receive_size_type_f = MAX_RECEIVE_SIZE_TYPE,
+	.max_receive_size_length_f = MAX_RECEIVE_SIZE_LENGTH,
+};
+
+struct fcoib_flogi_fdisc_request base_flogi_request_pkt = {
+	.fip.subcode = FCOIB_LS_REQUEST_SUB_OPCODE,
+	.fip.type.type = FIP_FIP_HDR_TYPE,
+	.fip.type.length = FIP_FIP_HDR_LENGTH,
+	.fip.vendor_id = FIP_VENDOR_MELLANOX,
+
+	.els_type_f = FLOGI_TYPE,
+	.els_length_f = FLOGI_REQUEST_LENGTH,
+	.infiniband_address_type_f = INFINIBAND_ADDRESS_TYPE,
+	.infiniband_address_length_f = INFINIBAND_ADDRESS_LENGTH,
+	.t10_vendor_id = "mellanox",
+};
+
+struct fcoib_logo_request base_logo_request_pkt = {
+	.fip.subcode = FCOIB_LS_REQUEST_SUB_OPCODE,
+	.fip.type.type = FIP_FIP_HDR_TYPE,
+	.fip.type.length = FIP_FIP_HDR_LENGTH,
+	.fip.vendor_id = FIP_VENDOR_MELLANOX,
+
+	.els_type_f = LOGO_TYPE,
+	.els_length_f = LOGO_REQUEST_LENGTH,
+	.infiniband_address_type_f = INFINIBAND_ADDRESS_TYPE,
+	.infiniband_address_length_f = INFINIBAND_ADDRESS_LENGTH,
+	.t10_vendor_id = "mellanox",
+};
+
+struct fcoib_ioa_alive base_ioa_alive_pkt = {
+	.fip.subcode = FCOIB_HOST_ALIVE_SUB_OPCODE,
+	.fip.type.type = FIP_FIP_HDR_TYPE,
+	.fip.type.length = FIP_FIP_HDR_LENGTH,
+	.fip.vendor_id = FIP_VENDOR_MELLANOX,
+
+	.infiniband_address_type_f = INFINIBAND_ADDRESS_TYPE,
+	.infiniband_address_length_f = INFINIBAND_ADDRESS_LENGTH,
+	.t10_vendor_id = "mellanox",
+};
+
+struct fcoib_vhba_alive base_vhba_alive_pkt = {
+	.fip.subcode = FCOIB_HOST_ALIVE_SUB_OPCODE,
+	.fip.type.type = FIP_FIP_HDR_TYPE,
+	.fip.type.length = FIP_FIP_HDR_LENGTH,
+	.fip.vendor_id = FIP_VENDOR_MELLANOX,
+
+	.infiniband_address_type_f = INFINIBAND_ADDRESS_TYPE,
+	.infiniband_address_length_f = INFINIBAND_ADDRESS_LENGTH,
+	.t10_vendor_id = "mellanox",
+
+	.infiniband_vx_port_id_type_f = INFINIBAND_VX_PORT_ID_TYPE,
+	.infiniband_vx_port_id_length_f = INFINIBAND_VX_PORT_ID_LENGTH,
+	.t10_vendor_id_2 = "mellanox",
+};
+
+int fcoib_advertise_parse(struct fip_dev_priv *priv,
+			  char *buffer, int length, struct fip_gw_data *data)
+{
+	int desc_length;
+	struct fcoib_advertise *pkt;
+
+	FIP_TEST_PKT_LENGTH(length, struct fcoib_advertise);
+
+	pkt = (struct fcoib_advertise *)(buffer + IB_GRH_BYTES);
+	desc_length = be16_to_cpu(pkt->fip.list_length);
+
+	data->info.flags = (be16_to_cpu(pkt->fip.flags) & FIP_FIP_ADVRTS_FLAG) ?
+	    FIP_GW_AVAILABLE : 0;
+
+	data->info.flags |=
+	    (be16_to_cpu(pkt->fip.flags) & FIP_FIP_SOLICITED_FLAG) ?
+	    0 : FIP_RCV_MULTICAST;
+
+	if (be16_to_cpu(pkt->fip.opcode) == FCOIB_FIP_OPCODE) {
+		if (pkt->fip_priority_type_f != FIP_PRIORITY_TYPE ||
+		    pkt->fip_priority_length_f != FIP_PRIORITY_LENGTH ||
+		    pkt->infiniband_address_type_f != INFINIBAND_ADDRESS_TYPE ||
+		    pkt->infiniband_address_length_f !=
+		    INFINIBAND_ADDRESS_LENGTH ||
+		    pkt->fip_name_identifier_type_f !=
+		    FIP_NAME_IDENTIFIER_TYPE ||
+		    pkt->fip_name_identifier_length_f !=
+		    FIP_NAME_IDENTIFIER_LENGTH ||
+		    pkt->fip_fabric_name_type_f != FIP_FABRIC_NAME_TYPE ||
+		    pkt->fip_fabric_name_length_f != FIP_FABRIC_NAME_LENGTH ||
+		    pkt->fka_adv_period_type_f != FKA_ADV_PERIOD_TYPE ||
+		    pkt->fka_adv_period_length_f != FKA_ADV_PERIOD_LENGTH ||
+		    pkt->partition_type_f != PARTITION_TYPE ||
+		    pkt->partition_length_f != PARTITION_LENGTH) {
+			fip_dbg(priv, LOG_PRIO_LOW,
+				"fcoib_advertise_parse dump packet\n");
+			return -EINVAL;
+		}
+
+		data->info.flags |= FIP_IS_FIP;
+
+		data->info.priority = pkt->priority;
+		data->info.gw_qpn = be32_to_cpu(pkt->qpn);
+		data->info.gw_port_id = be16_to_cpu(pkt->sl_gwportid) & 0xfff;
+		data->info.sl = be16_to_cpu(pkt->sl_gwportid) >> 12;
+		data->info.gw_lid = be16_to_cpu(pkt->lid);
+		memcpy(data->info.gw_guid, pkt->gw_guid,
+		       sizeof(data->info.gw_guid));
+		memcpy(data->info.switch_name, pkt->switch_name,
+		       sizeof(data->info.switch_name));
+
+		memcpy(data->info.fabric_name, pkt->fabric_name,
+		       sizeof(data->info.fabric_name));
+		data->info.keep_alive_frq = be32_to_cpu(pkt->fka_adv_period);
+		data->info.pkey = be16_to_cpu(pkt->pkey);
+
+	} else {
+		fip_dbg(priv, LOG_PRIO_LOW,
+			"fcoib_advertise_parse packet opcode is not "
+			"supported=0x%x\n", (int)be16_to_cpu(pkt->fip.opcode));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int fcoib_solicit_send(struct fip_dev_priv *priv,
+		       enum fip_packet_type multicast, u32 dqpn, u16 dlid)
+{
+	int pkt_size = sizeof(struct fcoib_solicit);
+	struct fip_discover *discover = &priv->discover;
+	int ret;
+	char *mem;
+	struct fcoib_solicit *pkt;
+	const u32 FCOIB_FIP_QKEY = 0x80020004;
+	int i;
+
+	/* alloc packet to be sent */
+	mem = kzalloc(pkt_size, GFP_KERNEL);
+	if (!mem) {
+		fip_warn(priv, "fcoib_solicit_send malloc failed\n");
+		return -EAGAIN;
+	}
+
+	pkt = (struct fcoib_solicit *)mem;
+	memcpy(pkt, &base_fcoib_solicit_pkt, sizeof(struct fcoib_solicit));
+	pkt->fip.opcode = cpu_to_be16(FCOIB_FIP_OPCODE);
+	pkt->fip.list_length =
+	    cpu_to_be16((sizeof(struct fcoib_solicit) >> 2) - 2),
+	    pkt->qpn = cpu_to_be32(discover->qp->qp_num);
+	pkt->lid = cpu_to_be16(priv->local_lid);
+	memcpy(pkt->gw_guid, &priv->local_gid.global.interface_id,
+	       sizeof(pkt->gw_guid));
+
+	for (i = 0; i < 8; i++)
+		pkt->node_name[i] = i;
+
+	pkt->max_fcoe_size = cpu_to_be32(priv->max_ib_mtu);
+
+	fip_dbg(priv, LOG_PRIO_MED, "fcoib_solicit_send creating "
+		"multicast=%d solicit packet\n", multicast);
+
+	if (multicast)
+		ret = send_generic_mcast_pkt(priv, &discover->tx_ring,
+					     mem, pkt_size, discover->qp,
+					     discover->pkey_index,
+					     discover->mcast[1]);
+	else
+		ret = send_generic_ucast_pkt(priv, &discover->tx_ring,
+					     mem, pkt_size, discover->qp,
+					     discover->pkey_index,
+					     dqpn, dlid, FCOIB_FIP_QKEY);
+	if (ret) {
+		fip_warn(priv, "discover_send error ret=%d\n", ret);
+		goto error_free_mem;
+	}
+
+	return 0;
+
+error_free_mem:
+	kfree(mem);
+	return -ENOMEM;
+}
+
+/* flogi is assumed to be 35 * 4 bytes */
+static int fcoib_flogi_request_send(struct fip_dev_priv *priv,
+				    struct fip_gw_data *gw,
+				    u8 *flogi, u32 host_data_qpn)
+{
+	int pkt_size = sizeof(struct fcoib_flogi_fdisc_request);
+	struct fcoib_flogi_fdisc_request *pkt;
+	int ret;
+	char *mem;
+
+	/* alloc packet to be sent */
+	mem = kzalloc(pkt_size, GFP_ATOMIC);
+	if (!mem) {
+		fip_warn(priv, "flogi request send malloc failed\n");
+		return -EAGAIN;
+	}
+
+	pkt = (struct fcoib_flogi_fdisc_request *)mem;
+	memcpy(pkt, &base_flogi_request_pkt,
+	       sizeof(struct fcoib_flogi_fdisc_request));
+
+	memcpy(pkt->els, flogi, sizeof(pkt->els));
+	pkt->fip.opcode = cpu_to_be16(FCOIB_FIP_OPCODE);
+	pkt->fip.list_length = cpu_to_be16((sizeof(struct
+						   fcoib_flogi_fdisc_request) >>
+					    2) - 2);
+	pkt->qpn = cpu_to_be32(host_data_qpn);
+	pkt->sl_gwportid = cpu_to_be16(gw->info.gw_port_id);
+	pkt->lid = cpu_to_be16(priv->local_lid);
+	memcpy(pkt->port_guid, &priv->local_gid.global.interface_id,
+	       sizeof(pkt->port_guid));
+
+	ret = send_generic_ucast_pkt(priv,
+				     &priv->discover.tx_ring,
+				     mem, pkt_size, priv->discover.qp,
+				     priv->pkey_index, gw->info.gw_qpn,
+				     gw->info.gw_lid, FCOIB_FIP_QKEY);
+	if (ret) {
+		fip_warn(priv,
+			 "flogi request send:  fip_ucast_send ret=%d\n", ret);
+		goto error_free_mem;
+	}
+
+	return 0;
+
+error_free_mem:
+	kfree(mem);
+	return -ENOMEM;
+}
+
+static int fcoib_logo_request_send(struct fip_dev_priv *priv,
+				   struct fip_gw_data *gw,
+				   u8 *logo, u32 host_data_qpn)
+{
+	int pkt_size = sizeof(struct fcoib_logo_request);
+	struct fcoib_logo_request *pkt;
+	int ret;
+	char *mem;
+
+	/* alloc packet to be sent */
+	mem = kzalloc(pkt_size, GFP_ATOMIC);
+	if (!mem) {
+		fip_warn(priv, "logo request send malloc failed\n");
+		return -EAGAIN;
+	}
+
+	pkt = (struct fcoib_logo_request *)mem;
+	memcpy(pkt, &base_logo_request_pkt, sizeof(struct fcoib_logo_request));
+
+	memcpy(pkt->els, logo, sizeof(pkt->els));
+	pkt->fip.opcode = cpu_to_be16(FCOIB_FIP_OPCODE);
+	pkt->fip.list_length = cpu_to_be16((sizeof(struct
+						   fcoib_logo_request) >> 2) -
+					   2);
+	pkt->qpn = cpu_to_be32(host_data_qpn);
+	pkt->sl_gwportid = cpu_to_be16(gw->info.gw_port_id);
+	pkt->lid = cpu_to_be16(priv->local_lid);
+	memcpy(pkt->port_guid, &priv->local_gid.global.interface_id,
+	       sizeof(pkt->port_guid));
+
+	ret = send_generic_ucast_pkt(priv,
+				     &priv->discover.tx_ring,
+				     mem, pkt_size, priv->discover.qp,
+				     priv->pkey_index, gw->info.gw_qpn,
+				     gw->info.gw_lid, FCOIB_FIP_QKEY);
+	if (ret) {
+		fip_warn(priv,
+			 "logo request send:  fip_ucast_send ret=%d\n", ret);
+		goto error_free_mem;
+	}
+
+	return 0;
+
+error_free_mem:
+	kfree(mem);
+	return -ENOMEM;
+}
+
+int fcoib_ioa_alive_send(struct fip_dev_priv *priv, struct fip_gw_data *gw)
+{
+	int pkt_size = sizeof(struct fcoib_ioa_alive);
+	struct fcoib_ioa_alive *pkt;
+	int ret;
+	char *mem;
+
+	/* alloc packet to be sent */
+	mem = kzalloc(pkt_size, GFP_KERNEL);
+	if (!mem) {
+		fip_warn(priv, "IOA alive send malloc failed\n");
+		return -EAGAIN;
+	}
+
+	pkt = (struct fcoib_ioa_alive *)mem;
+	memcpy(pkt, &base_ioa_alive_pkt, sizeof(struct fcoib_ioa_alive));
+
+	pkt->fip.opcode = cpu_to_be16(FCOIB_FIP_OPCODE);
+	pkt->fip.list_length = cpu_to_be16(FIP_VENDOR_ID_LENGTH +
+					   INFINIBAND_ADDRESS_LENGTH);
+	pkt->qpn = cpu_to_be32(gw->info.gw_qpn);
+	pkt->sl_gwportid = cpu_to_be16(gw->info.gw_port_id);
+	pkt->lid = cpu_to_be16(priv->local_lid);
+	memcpy(pkt->port_guid, &priv->local_gid.global.interface_id,
+	       sizeof(pkt->port_guid));
+
+	ret = send_generic_ucast_pkt(priv,
+				     &priv->discover.tx_ring,
+				     mem, pkt_size, priv->discover.qp,
+				     priv->pkey_index, gw->info.gw_qpn,
+				     gw->info.gw_lid, FCOIB_FIP_QKEY);
+	if (ret) {
+		fip_warn(priv, "IOA alive send:  fip_ucast_send ret=%d\n", ret);
+		goto error_free_mem;
+	}
+
+	return 0;
+
+error_free_mem:
+	kfree(mem);
+	return -ENOMEM;
+}
+
+int fcoib_vhba_alive_send(struct fip_dev_priv *priv, struct fip_gw_data *gw)
+{
+	int pkt_size = sizeof(struct fcoib_vhba_alive);
+	struct fcoib_vhba_alive *pkt;
+	int ret;
+	char *mem;
+
+	/* alloc packet to be sent */
+	mem = kzalloc(pkt_size, GFP_KERNEL);
+	if (!mem) {
+		fip_warn(priv, "vHBA alive send malloc failed\n");
+		return -EAGAIN;
+	}
+
+	pkt = (struct fcoib_vhba_alive *)mem;
+	memcpy(pkt, &base_vhba_alive_pkt, sizeof(struct fcoib_vhba_alive));
+
+	pkt->fip.opcode = cpu_to_be16(FCOIB_FIP_OPCODE);
+	pkt->fip.list_length = cpu_to_be16(FIP_VENDOR_ID_LENGTH +
+					   INFINIBAND_ADDRESS_LENGTH +
+					   INFINIBAND_VX_PORT_ID_LENGTH);
+	pkt->qpn = cpu_to_be32(gw->info.gw_qpn);
+	pkt->sl_gwportid = cpu_to_be16(gw->info.gw_port_id);
+	pkt->lid = cpu_to_be16(priv->local_lid);
+	memcpy(pkt->port_guid, &priv->local_gid.global.interface_id,
+	       sizeof(pkt->port_guid));
+
+	fcoib_get_vhba_fcid(gw->fc_handle,
+			    (u8 *) (&pkt->vn_port_addres_id) + 1);
+
+	ret = send_generic_ucast_pkt(priv,
+				     &priv->discover.tx_ring,
+				     mem, pkt_size, priv->discover.qp,
+				     priv->pkey_index, gw->info.gw_qpn,
+				     gw->info.gw_lid, FCOIB_FIP_QKEY);
+	if (ret) {
+		fip_warn(priv,
+			 "vHBA alive send:  fip_ucast_send ret=%d\n", ret);
+		goto error_free_mem;
+	}
+
+	return 0;
+
+error_free_mem:
+	kfree(mem);
+	return -ENOMEM;
+}
+
+int fcoib_pkt_parse(struct fip_dev_priv *priv,
+		    char *buffer, int length, int *fip_type)
+{
+	struct fip_fip_header *fip_header;
+	u16 fip_opcode;
+
+	fip_header = (struct fip_fip_header *)(buffer +
+					       IB_GRH_BYTES +
+					       sizeof(struct fip_fcoib_ver));
+
+	fip_opcode = be16_to_cpu(fip_header->opcode);
+
+	if (fip_opcode != FCOIB_FIP_OPCODE) {
+		fip_dbg(priv, LOG_PRIO_LOW, "packet: packet is "
+			"not FCoIB FIP packet\n");
+		*fip_type = 0;
+		return -EINVAL;
+	}
+
+	*fip_type = fip_opcode;
+
+	return fip_header->subcode;
+}
+
+/*
+ * Configure the discover QP. This includes configuring rx+tx
+ * moving the discover QP to RTS and creating the tx  and rx rings
+ */
+int fip_discover_start_rings(struct fip_dev_priv *priv)
+{
+	int ret;
+	struct fip_discover *discover = &priv->discover;
+
+	spin_lock_init(&discover->lock);
+
+	ret = fip_init_tx(priv, discover->tx_ring.size, &discover->tx_ring);
+	if (ret) {
+		fip_warn(priv, "fip_init_tx failed ret=%d\n", ret);
+		return ret;
+	}
+
+	ret = fip_init_rx(priv, discover->rx_ring.size, discover->qp,
+			  &discover->rx_ring);
+	if (ret) {
+		fip_warn(priv, "fip_init_rx returned %d\n", ret);
+		goto release_queues;
+	}
+
+	return 0;
+
+release_queues:
+	fip_flush_rings(priv, discover->cq, discover->qp,
+			&discover->rx_ring, &discover->tx_ring);
+	fip_free_rings(priv, &discover->rx_ring, &discover->tx_ring);
+	return ret;
+}
+
+/*
+ * This function is the RX packet handler entry point at the thread level
+ * (unlike the completion handler that runs from interrupt context).
+ * the function calls a handler function and then reallocats the ring
+ * entry for the next receive.
+*/
+void fip_discover_process_rx(struct work_struct *work)
+{
+	struct fip_discover *discover =
+	    container_of(work, struct fip_discover, pkt_rcv_task);
+	struct fip_dev_priv *priv =
+	    container_of(discover, struct fip_dev_priv, discover);
+	int mtu_size = FIP_UD_BUF_SIZE(priv->max_ib_mtu);
+	int ret;
+
+	if (priv->discover.flush == 1)
+		return;
+
+	while (discover->rx_ring.head != discover->rx_ring.tail) {
+		if (discover->rx_ring.ring[discover->rx_ring.tail].length == 0)
+			continue;
+
+		if (discover->state == FIP_DISCOVER_LOGIN) {
+			/* login is the first state we RX packets in */
+			ret = fip_discover_rx_packet(priv,
+						     discover->rx_ring.tail);
+			if (ret)
+				fip_warn(priv, "discover_rx_packet ret=%d\n",
+					 ret);
+		}
+
+		ret = fip_post_receive(priv, discover->qp, mtu_size,
+				       discover->rx_ring.tail,
+				       discover->rx_ring.ring[discover->rx_ring.
+							      tail].mem,
+				       discover->rx_ring.ring +
+				       discover->rx_ring.tail);
+		if (ret)
+			fip_warn(priv, "fip_post_receive ret=%d\n", ret);
+
+		discover->rx_ring.tail++;
+		discover->rx_ring.tail &= (discover->rx_ring.size - 1);
+	}
+	return;
+}
+
+/*
+ * Alloc the discover CQ, QP. Configure the QP to RTS.
+ * alloc the RX + TX rings and queue work for discover
+ * finite state machine code.
+ */
+int fip_discover_init(struct fip_dev_priv *priv)
+{
+	struct ib_device *ca = priv->ca;
+	struct ib_qp_init_attr qp_init_attr;
+	struct fip_discover *discover;
+	int i;
+
+	discover = &priv->discover;
+
+	discover->state = FIP_DISCOVER_INIT;
+	discover->flush = 0;
+	discover->rx_ring.size = FIP_PROTOCOL_RX_SIZE;
+	discover->tx_ring.size = FIP_PROTOCOL_TX_SIZE;
+	discover->pkey = priv->pkey;
+	discover->backoff_time = 1;
+	for (i = 0; i < FIP_DISCOVER_NUM_MCAST; i++)
+		discover->mcast[i] = NULL;
+
+	sema_init(&discover->flush_done, 0);
+
+	INIT_DELAYED_WORK(&discover->task, fip_discover_fsm);
+	INIT_DELAYED_WORK(&discover->cleanup_task, fip_purge_gws);
+	INIT_WORK(&discover->pkt_rcv_task, fip_discover_process_rx);
+	INIT_WORK(&discover->mcast_refresh_task, fip_refresh_mcasts);
+	INIT_LIST_HEAD(&discover->gw_list);
+	INIT_LIST_HEAD(&discover->gw_rm_list);
+
+	discover->cq = ib_create_cq(priv->ca, fip_discover_comp, NULL, priv,
+				    discover->rx_ring.size +
+				    discover->tx_ring.size, 0);
+	if (IS_ERR(discover->cq)) {
+		fip_warn(priv, "%s: failed to create receive CQ\n", ca->name);
+		return -EIO;
+	}
+
+	memset(&qp_init_attr, 0, sizeof(qp_init_attr));
+	qp_init_attr.cap.max_send_wr = discover->tx_ring.size;
+	qp_init_attr.cap.max_recv_wr = discover->rx_ring.size;
+	qp_init_attr.cap.max_send_sge = 1;
+	qp_init_attr.cap.max_recv_sge = 1;
+	qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+	qp_init_attr.qp_type = IB_QPT_UD;
+	qp_init_attr.send_cq = discover->cq;
+	qp_init_attr.recv_cq = discover->cq;
+
+	discover->qp = ib_create_qp(priv->pd, &qp_init_attr);
+	if (IS_ERR(discover->qp)) {
+		fip_warn(priv, "%s: failed to create QP\n", ca->name);
+		goto error_free_cq;
+	}
+
+	fip_dbg(priv, LOG_PRIO_HIGH, "Local QPN=%d, LID=%d\n",
+		(int)discover->qp->qp_num, (int)priv->local_lid);
+
+	/* TODO - figure out whats going on with the PKEY */
+	if (ib_find_pkey(priv->ca, priv->port, discover->pkey,
+			 &discover->pkey_index)) {
+		fip_warn(priv, "P_Key 0x%04x not found\n", discover->pkey);
+		goto error_free_qp;
+	}
+
+	/* move QP from reset to RTS */
+	if (fip_init_qp(priv, discover->qp, discover->pkey_index,
+			FCOIB_FIP_QKEY)) {
+		fip_warn(priv, "ipoib_init_qp returned\n");
+		goto error_free_qp;
+	}
+
+	/* init RX+TX rings */
+	if (fip_discover_start_rings(priv)) {
+		fip_warn(priv, "%s: failed to move QP to RTS or "
+			 "allocate queues\n", ca->name);
+		goto error_free_qp;
+	}
+
+	/* enable recieving CQ completions */
+	if (ib_req_notify_cq(discover->cq, IB_CQ_NEXT_COMP))
+		goto error_release_rings;
+
+	/* start discover FSM code */
+	queue_delayed_work(fip_workqueue, &discover->task, 0 * HZ);
+
+	return 0;
+
+error_release_rings:
+	fip_flush_rings(priv, discover->cq, discover->qp,
+			&discover->rx_ring, &discover->tx_ring);
+	fip_free_rings(priv, &discover->rx_ring, &discover->tx_ring);
+error_free_qp:
+	ib_destroy_qp(discover->qp);
+error_free_cq:
+	ib_destroy_cq(discover->cq);
+	return -ENODEV;
+}
+
+/*
+ * free the discover TX and RX rings, QP and CQ.
+*/
+void fip_discover_cleanup(struct fip_dev_priv *priv)
+{
+	if (priv->discover.state == FIP_DISCOVER_OFF)
+		goto cleanup_done;
+
+	/*
+	 * move FSM to flush state and wait for the FSM
+	 * to finish whatever it is doing before we continue
+	 */
+	fip_dbg(priv, LOG_PRIO_LOW, "==>priv->discover.flush = 1\n");
+
+	spin_lock_irq(&priv->discover.lock);
+	priv->discover.flush = 1;
+	spin_unlock_irq(&priv->discover.lock);
+
+	cancel_delayed_work(&priv->discover.task);
+	queue_delayed_work(fip_workqueue, &priv->discover.task, 0);
+	down(&priv->discover.flush_done);
+
+	fip_flush_rings(priv, priv->discover.cq, priv->discover.qp,
+			&priv->discover.rx_ring, &priv->discover.tx_ring);
+	flush_workqueue(fip_workqueue);
+
+	fip_free_rings(priv, &priv->discover.rx_ring, &priv->discover.tx_ring);
+	if (priv->discover.qp)
+		ib_destroy_qp(priv->discover.qp);
+	priv->discover.qp = NULL;
+
+	if (priv->discover.cq)
+		ib_destroy_cq(priv->discover.cq);
+	priv->discover.cq = NULL;
+
+cleanup_done:
+	return;
+}
+
+/*
+ * This function handles completions of both TX and RX
+ * packets. RX packets are unmapped and passed to a thread
+ * for processing. TX packets are unmapped and freed.
+ * Note: this function is called from interrupt context
+ */
+void fip_discover_comp(struct ib_cq *cq, void *dev_ptr)
+{
+	struct fip_dev_priv *priv = dev_ptr;
+
+	spin_lock(&priv->discover.lock);
+	/* handle completions. On RX packets this will call discover_process_rx
+	 * from thread context to continue processing */
+	if (fip_comp(priv, priv->discover.cq, &priv->discover.rx_ring,
+		     &priv->discover.tx_ring)) {
+		if (!priv->discover.flush)
+			queue_work(fip_workqueue, &priv->discover.pkt_rcv_task);
+	}
+	spin_unlock(&priv->discover.lock);
+}
+
+/*
+ * Queue the GW for deletion. And trigger a delayed call to the cleanup
+ * function.
+ * Note: This deletion method insures that all pending GW work requests
+ * are cleared without dependency of the calling context.
+*/
+void fip_close_gw(struct fip_gw_data *gw)
+{
+	if (gw->state >= FIP_GW_WAITING_FOR_FLOGI) {
+		if (gw->fc_handle)
+			fcoib_destroy_vhba(gw->fc_handle);
+		else
+			printk(KERN_WARNING "close gw for unexistent vhba\n");
+	}
+
+	gw->vhba_ka_tmr_valid = 0;
+	gw->host_ka_tmr_valid = 0;
+	gw->gw_ka_tmr_valid = 0;
+	gw->flush = 1;
+	list_del(&gw->list);
+	list_add(&gw->list, &gw->priv->discover.gw_rm_list);
+	gw->info.gw_num_vnics = 0;
+	cancel_delayed_work(&gw->gw_task);
+
+	queue_delayed_work(fip_workqueue, &gw->priv->discover.cleanup_task,
+			   DELAYED_WORK_CLEANUP_JIFFS);
+}
+
+/*
+ * Free GW resources. This includes destroying the vnics. If the GW can be
+ * totaly destroyed (no pending work for the GW and all the vnics have been
+ * destroyed) the GW will be removed from the GWs list and it's memory
+ * freed. If the GW can not be closed at this time it will not be freed
+ * and the function will return an error.
+ * In this case the caller needs to recall the function to complete the
+ * operation.
+ * Do not call this function directly use: fip_close_gw
+*/
+static int fip_free_gw(struct fip_dev_priv *priv, struct fip_gw_data *gw)
+{
+	gw->flush = 1;
+	gw->info.gw_num_vnics = 0;
+
+	cancel_delayed_work(&gw->gw_task);
+	if (delayed_work_pending(&gw->gw_task))
+		return -EBUSY;
+
+	fip_dbg(priv, LOG_PRIO_LOW, "fip_free_gw. freeing GW\n");
+	list_del(&gw->list);
+	kfree(gw->new_gw_data);
+	kfree(gw);
+	return 0;
+}
+
+/*
+ * permanently delete all GWs pending delete. The function goes over
+ * the list of GWs awaiting deletion and tries to delete them. If the
+ * GW destructor returns an error value (currently busy) the function
+ * will requeue it self for another try.
+ */
+static void fip_purge_gws(struct work_struct *work)
+{
+	struct fip_discover *discover = container_of(work,
+						     struct fip_discover,
+						     cleanup_task.work);
+	struct fip_dev_priv *priv = container_of(discover,
+						 struct fip_dev_priv, discover);
+	struct fip_gw_data *gw, *tmp_gw;
+	int respawn = 0;
+
+	list_for_each_entry_safe(gw, tmp_gw, &discover->gw_rm_list, list) {
+		if (fip_free_gw(priv, gw) == -EBUSY)
+			respawn = 1;
+	}
+
+	if (respawn) {
+		fip_dbg(priv, LOG_PRIO_LOW,
+			"fip_free_gw is busy. respawn purge_gws\n");
+		queue_delayed_work(fip_workqueue, &discover->cleanup_task,
+				   DELAYED_WORK_CLEANUP_JIFFS);
+	}
+}
+
+#define NO_GWS_OPEN(discover) \
+	(list_empty(&(discover)->gw_rm_list) && \
+	list_empty(&(discover)->gw_list))
+
+/*
+ * Go over the GW list and try to close the GWs. It is possible that some
+ * of the GWs have pending work and therefore can not be closed. We can not
+ * sleep on this because we might be running on the same context as the one
+ * we are waiting for. To solve this recall the function if needed.
+ * Returns 0 if all GWs were removed and -EBUSY if one or more are still
+ * open.
+*/
+int fip_free_gw_list(struct fip_dev_priv *priv)
+{
+	struct fip_discover *discover = &priv->discover;
+	struct fip_gw_data *curr_gw, *tmp_gw;
+
+	list_for_each_entry_safe(curr_gw, tmp_gw, &discover->gw_list, list)
+	    fip_close_gw(curr_gw);
+
+	if (!NO_GWS_OPEN(discover)) {
+		fip_dbg(priv, LOG_PRIO_LOW, "fip_free_gw_list discover->"
+			"gw_rm_list %s gw_list %s\n",
+			list_empty(&discover->
+				   gw_rm_list) ? "empty" : "not empty",
+			list_empty(&discover->gw_list) ? "empty" : "not empty");
+		return -EBUSY;
+	}
+
+	cancel_delayed_work(&discover->cleanup_task);
+	if (delayed_work_pending(&discover->cleanup_task)) {
+		fip_dbg(priv, LOG_PRIO_LOW, "fip_free_gw_list waiting for "
+			"pending work on cleanup_task\n");
+		return -EBUSY;
+	}
+
+	fip_dbg(priv, LOG_PRIO_LOW, "fip_free_gw_list"
+		" Done freeing all GW we can go on\n");
+
+	return 0;
+}
+
+/*
+ * Look for a GW in the GW list. The search keys used are the GW lid (unique)
+ * and the GW port_id assuming that a single GW phisical port can advertise
+ * itself more then once.
+*/
+struct fip_gw_data *fip_find_gw_in_list(struct fip_discover *discover,
+					u16 gw_port_id, u16 gw_lid)
+{
+	struct fip_gw_data *curr_gw;
+
+	list_for_each_entry(curr_gw, &discover->gw_list, list) {
+		if (curr_gw->info.gw_lid == gw_lid &&
+		    curr_gw->info.gw_port_id == gw_port_id) {
+			return curr_gw;
+		}
+	}
+	return NULL;
+}
+
+struct fip_gw_data *fip_find_gw_by_guid(struct fip_discover *discover,
+					u16 gw_port_id, u8 *gw_guid)
+{
+	struct fip_gw_data *curr_gw;
+
+	list_for_each_entry(curr_gw, &discover->gw_list, list) {
+		if (curr_gw->info.gw_port_id == gw_port_id &&
+		    !memcmp(curr_gw->info.gw_guid, gw_guid, 8)) {
+			return curr_gw;
+		}
+	}
+	return NULL;
+}
+
+static struct fip_gw_data *fip_discover_create_gw(struct fip_dev_priv *priv)
+{
+	struct fip_gw_data *gw_data;
+
+	gw_data = kmalloc(sizeof(struct fip_gw_data), GFP_KERNEL);
+	if (!gw_data)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_DELAYED_WORK(&gw_data->gw_task, fip_gw_fsm);
+	gw_data->priv = priv;
+	gw_data->flush = 0;
+	memset(gw_data->bitmask, 0, sizeof(gw_data->bitmask));
+	gw_data->host_ka_tmr_valid = 0;
+	gw_data->vhba_ka_tmr_valid = 0;
+	gw_data->gw_ka_tmr_valid = 0;
+
+	return gw_data;
+}
+
+static int fip_discover_rx_advertise(struct fip_dev_priv *priv,
+				     struct fip_gw_data *advertise_data)
+{
+	struct fip_discover *discover = &priv->discover;
+	struct fip_gw_data *gw_data;
+	int update_entry = 0;
+
+	/* see if we received advertise packets from this GW before */
+	gw_data = fip_find_gw_in_list(discover,
+				      advertise_data->info.gw_port_id,
+				      advertise_data->info.gw_lid);
+
+	/*
+	 * GW not found in GW list, create a new GW structure and add it to GW
+	 * list. If GW was found in list but it is in multicast state (based on
+	 * received mcast packet) we will replace it with the newer up-to-date
+	 * packet.
+	 */
+	if (!gw_data) {
+		gw_data = fip_discover_create_gw(priv);
+		if (IS_ERR(gw_data))
+			return -ENOMEM;
+
+		list_add_tail(&gw_data->list, &discover->gw_list);
+		update_entry = 1;
+	} else {
+		if (gw_data->flush)
+			return 0;
+
+		if (gw_data->state <= FIP_GW_RCVD_UNSOL_AD) {
+			kfree(gw_data->new_gw_data);
+			update_entry = 1;
+		}
+	}
+
+	if (update_entry) {
+		memcpy(&gw_data->info, &advertise_data->info,
+		       sizeof(struct fip_gw_data_info));
+		gw_data->state = FIP_GW_RCVD_UNSOL_AD;
+		gw_data->new_gw_data = NULL;
+	}
+
+	/* if multicast advertisement received */
+	if (advertise_data->info.flags & FIP_RCV_MULTICAST) {
+		gw_data->gw_ka_tmr = jiffies + 3 * FKA_ADV_PERIOD * HZ;
+
+		/* we are beyond accepting mcast advertisement */
+		if (gw_data->state != FIP_GW_RCVD_UNSOL_AD)
+			return 0;
+
+		fip_dbg(priv, LOG_PRIO_VERY_LOW,
+			"Received mcast advertise sending ucast solicit"
+			" to GW qpn=%d lid=%d flags=0x%x\n",
+			gw_data->info.gw_qpn, gw_data->info.gw_lid,
+			gw_data->info.flags);
+	} else {		/* unicast advertisement received */
+		int ack_received =
+		    advertise_data->info.flags & FIP_GW_AVAILABLE;
+
+		fip_dbg(priv, LOG_PRIO_VERY_LOW,
+			"received ucast advertise from GW qpn=%d lid=%d"
+			" flags=0x%x\n",
+			gw_data->info.gw_qpn, gw_data->info.gw_lid,
+			gw_data->info.flags);
+
+		/* if this is first ACK received move to FIP_GW_ACK_RCVD */
+		if (ack_received && gw_data->state == FIP_GW_SENT_SOL)
+			gw_data->state = FIP_GW_RCVD_SOL_AD;
+	}
+
+	/* we will call the GW FSM to handle */
+	cancel_delayed_work(&gw_data->gw_task);
+	fip_gw_fsm(&gw_data->gw_task.work);
+	return 0;
+}
+
+/*
+ * This function handles a single received packet that are expected to be
+ * GW advertisements or login ACK packets. The function first parses the
+ * packet and decides what is the packet type and then handles the packets
+ * specifically according to its type. This functions runs in task context.
+*/
+int fip_discover_rx_packet(struct fip_dev_priv *priv, int index)
+{
+	struct fip_discover *discover = &priv->discover;
+	union {
+		struct fip_gw_data advertise_data;
+	} pkt_data;
+	char *packet = discover->rx_ring.ring[index].mem;
+	int length = discover->rx_ring.ring[index].length;
+	int ret, pkt_type, fip_type;
+
+	pkt_type = fcoib_pkt_parse(priv, packet, length, &fip_type);
+	if (pkt_type < 0)
+		return 0;
+
+	switch (pkt_type) {
+	case FCOIB_GW_ADV_SUB_OPCODE:
+		ret = fcoib_advertise_parse(priv, packet, length,
+					    &pkt_data.advertise_data);
+		if (!ret) {
+			return fip_discover_rx_advertise(priv, &pkt_data.
+							 advertise_data);
+		}
+		break;
+	case FCOIB_LS_REPLY_SUB_OPCODE:
+		{
+			struct fcoib_flogi_fdisc_acc *rep =
+			    (struct fcoib_flogi_fdisc_acc *)(packet +
+							     IB_GRH_BYTES);
+			struct fip_gw_data *gw;
+
+			/* find the GW that this login belongs to */
+			gw = fip_find_gw_in_list(discover,
+						 be16_to_cpu(rep->sl_gwPortId),
+						 be16_to_cpu(rep->lid));
+			if (!gw)
+				break;
+
+			if (!gw->fc_handle) {
+				printk(KERN_ERR "mlx4_fcoib: NO FC HANDLE\n");
+				break;
+			}
+
+			if (!fcoib_recvd_flogi_reply(gw->fc_handle,
+						     rep->els,
+						     (rep->els_length_f - 1) * 4,
+						     be32_to_cpu(rep->qpn))) {
+				gw->state = FIP_GW_RCVD_FLOGI_ACCPT;
+				cancel_delayed_work(&gw->gw_task);
+				fip_gw_fsm(&gw->gw_task.work);
+			} else {
+				printk(KERN_WARNING
+					"mlx4_fcoib: rejected gw\n");
+				gw->state = FIP_GW_RESET;
+			}
+		}
+		break;
+	case FCOIB_CLVL_SUB_OPCODE:
+		{
+			struct fcoib_clear_virtual_link_ioa *clvl =
+			    (struct fcoib_clear_virtual_link_ioa *)
+			    (packet + IB_GRH_BYTES);
+			struct fip_gw_data *gw;
+#define IOA_CLVL_LIST_LENGTH  (FIP_VENDOR_ID_LENGTH + \
+						  INFINIBAND_ADDRESS_LENGTH + \
+						 FIP_NAME_IDENTIFIER_LENGTH)
+#define VHBA_CLVL_LIST_LENGTH (IOA_CLVL_LIST_LENGTH + \
+					       INFINIBAND_VX_PORT_ID_LENGTH)
+
+			/* we should not look for gw by its' lid - because the
+			   gw may send CLVL because of changing this lid */
+
+			gw = fip_find_gw_by_guid(discover,
+						 be16_to_cpu(clvl->sl_gwPortId),
+						 clvl->gw_guid);
+			if (!gw) {
+				printk(KERN_ERR
+					"CLVL for non-existing gw\n");
+				break;
+			}
+
+			/* TODO: We should differ between IOA_CLVL to VHBA_CLVL
+			 * after vhba virtualization implementation, for now
+			 * we close the gw on VHBA_CLVL because each gw has one
+			 * vhba*/
+
+			if (be16_to_cpu(clvl->fip.list_length) >=
+			    IOA_CLVL_LIST_LENGTH)
+				fip_close_gw(gw);
+			else
+				printk(KERN_WARNING
+				       "received CLVL with unexpected size\n");
+		}
+		break;
+	default:
+		printk(KERN_WARNING "received unknown packet\n");
+		break;
+	}
+	return 0;
+}
+
+/*
+ * This function is a callback called upon successful join to a
+ * multicast group. The function checks if we have joined + attached
+ * to all required mcast groups and if so moves the discovery FSM to solicit.
+*/
+void fip_discover_mcast_connect_cb(struct mcast_entry *mcast,
+				   void *discover_context)
+{
+	struct fip_discover *discover = discover_context;
+	struct fip_dev_priv *priv =
+	    container_of(discover, struct fip_dev_priv, discover);
+	int i;
+
+	for (i = 0; i < FIP_DISCOVER_NUM_MCAST; i++)
+		if (mcast == discover->mcast[i])
+			break;
+
+	/*
+	 * if we have not started joining the mcast or the join is still in
+	 * progress return. We will continue only when all is done
+	 */
+	for (i = 0; i < FIP_DISCOVER_NUM_MCAST; i++) {
+		if (discover->mcast[i] == NULL ||
+		    !test_bit(MCAST_FLAG_DONE, &discover->mcast[i]->flags))
+			return;
+	}
+
+	/* in the case of a reconnect don't change state or send a solicit
+	 * packet */
+	if (discover->state < FIP_DISCOVER_SOLICIT) {
+		fip_dbg(priv, LOG_PRIO_LOW,
+			"fip_multicast_connected "
+			"moved state to solicit\n");
+		spin_lock_irq(&discover->lock);
+		if (!discover->flush) {
+			/* delay sending solicit packet by 0-100 mSec */
+			int rand_delay = jiffies % 100;	/*get_random_int() */
+			discover->state = FIP_DISCOVER_SOLICIT;
+			cancel_delayed_work(&discover->task);
+			/* This is really (rand_delay / 1000) * HZ */
+			queue_delayed_work(fip_workqueue, &discover->task,
+					   (rand_delay * HZ) / 1000);
+		}
+		spin_unlock_irq(&discover->lock);
+	}
+	fip_dbg(priv, LOG_PRIO_LOW, "discover_mcast_connect_cb done\n");
+}
+
+/*
+ * Try to connect to the relevant mcast groups. If one of the mcast failed
+ * The function should be recalled to try and complete the join process
+ * (for the mcast groups that the join process was not performed).
+ * Note: A successful return of fip_mcast_join means that the mcast join
+ * started, not that the join completed. completion of the connection process
+ * is asyncronous and uses a supplyed callback.
+*/
+int fip_discover_mcast_connect(struct fip_dev_priv *priv)
+{
+	struct fip_discover *discover = &priv->discover;
+
+	fip_dbg(priv, LOG_PRIO_LOW, "discover_mcast_connect\n");
+
+	priv->mcast.flags = 0;
+
+	/* connect to a well known multi cast group */
+	discover->mcast[0] = fip_mcast_join(&priv->mcast, discover,
+					     FIP_DISCOVER_MGID, FCOIB_FIP_QKEY,
+					     priv->discover.pkey,
+					     priv->discover.qp,
+					     MCAST_RECEIVE_ONLY,
+					     fip_discover_mcast_connect_cb);
+	if (!discover->mcast[0]) {
+		fip_warn(priv, "failed to join advertise MCAST groups\n");
+		return -1;
+	}
+
+	discover->mcast[1] = fip_mcast_join(&priv->mcast, discover,
+					     FIP_SOLICIT_MGID, FCOIB_FIP_QKEY,
+					     priv->discover.pkey,
+					     priv->discover.qp, MCAST_SEND_ONLY,
+					     fip_discover_mcast_connect_cb);
+	if (!discover->mcast[1]) {
+		fip_warn(priv, "failed to join solicit MCAST groups\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+void fip_discover_mcast_disconnect(struct fip_dev_priv *priv)
+{
+	struct fip_discover *discover = &priv->discover;
+	int i;
+
+	for (i = 0; i < FIP_DISCOVER_NUM_MCAST; i++) {
+		if (discover->mcast[i])
+			fip_mcast_free(discover->mcast[i]);
+		discover->mcast[i] = NULL;
+	}
+}
+
+static int fip_discover_mcast_recnct(struct fip_dev_priv *priv)
+{
+	fip_discover_mcast_disconnect(priv);
+	return fip_discover_mcast_connect(priv);
+}
+
+/*
+ * This function unjoins and rejoins all the mcasts used for a specific port.
+ * This includes 2 mcasts used by the discovery and the mcasts used for the
+ * vnics attached to the various GW using the port.
+*/
+void fip_refresh_mcasts(struct work_struct *work)
+{
+	struct fip_discover *discover =
+	    container_of(work, struct fip_discover, mcast_refresh_task);
+	struct fip_dev_priv *priv =
+	    container_of(discover, struct fip_dev_priv, discover);
+
+	if (discover->flush)
+		return;
+
+	fip_dbg(priv, LOG_PRIO_LOW, "discover_refresh_mcast: "
+		"calling discover_mcast_recnct\n");
+	if (fip_discover_mcast_recnct(priv))
+		fip_warn(priv, "discover_refresh_mcast: "
+			 "discover_mcast_recnct failed\n");
+}
+
+static int fcoib_els_over_fip_cb(u64 gw_discovery_handle,
+				 u64 gw_fc_handle,
+				 enum els_over_fip_type type,
+				 u8 *els, u32 host_data_qpn)
+{
+	struct fip_gw_data *curr_gw;
+	int ret = -EINVAL;
+
+	curr_gw = (struct fip_gw_data *)gw_discovery_handle;
+
+	switch (type) {
+	case FLOGI_OVER_FIP:
+		curr_gw->vhba_ka_tmr_valid = 0;
+		curr_gw->state = FIP_GW_SENT_FLOGI;
+
+		curr_gw->fc_handle = gw_fc_handle;
+		ret = fcoib_flogi_request_send(curr_gw->priv,
+					       curr_gw, els, host_data_qpn);
+		break;
+
+	case LOGO_OVER_FIP:
+		ret = fcoib_logo_request_send(curr_gw->priv,
+					      curr_gw, els, host_data_qpn);
+		break;
+	}
+	return ret;
+}
+
+static void fip_handle_gw_timers(struct fip_gw_data *curr_gw)
+{
+	if (curr_gw->host_ka_tmr_valid &&
+	    time_after_eq(jiffies, curr_gw->host_ka_tmr)) {
+		curr_gw->host_ka_tmr = jiffies + FKA_ADV_PERIOD * HZ;
+		fcoib_ioa_alive_send(curr_gw->priv, curr_gw);
+	}
+
+	if (curr_gw->vhba_ka_tmr_valid &&
+	    time_after_eq(jiffies, curr_gw->vhba_ka_tmr)) {
+		curr_gw->vhba_ka_tmr = jiffies + 90 * HZ;
+		fcoib_vhba_alive_send(curr_gw->priv, curr_gw);
+	}
+
+	if (curr_gw->gw_ka_tmr_valid &&
+	    time_after_eq(jiffies, curr_gw->gw_ka_tmr)) {
+		curr_gw->gw_ka_tmr = jiffies + 3 * FKA_ADV_PERIOD * HZ;
+		printk(KERN_WARNING
+		       "no keep alives from GW remove GW\n");
+		fip_close_gw(curr_gw);
+	}
+}
+
+static inline u64 guid_to_mac(u64 guid)
+{
+	return (guid & 0xffffff) | ((guid & 0xffffff0000000000) >> 16);
+}
+
+static void fip_gw_fsm(struct work_struct *work)
+{
+	struct fip_gw_data *curr_gw = container_of(work,
+						   struct fip_gw_data,
+						   gw_task.work);
+	int ret;
+	unsigned long next_wakeup = (3 * FKA_ADV_PERIOD * HZ);	/* timeout */
+	unsigned long rand = jiffies % 100;
+	u64 wwn, wwnn, wwpn;
+
+	if (curr_gw->flush)
+		return;
+
+	switch (curr_gw->state) {
+	case FIP_GW_RCVD_UNSOL_AD:
+		fip_dbg(curr_gw->priv, LOG_PRIO_VERY_LOW,
+			"Discover login, gw_mcast_rcv\n");
+		ret = 0;
+		curr_gw->state = FIP_GW_SENT_SOL;
+		ret = fcoib_solicit_send(curr_gw->priv,
+					 FIP_DISCOVER_UCAST,
+					 curr_gw->info.gw_qpn,
+					 curr_gw->info.gw_lid);
+		if (ret)
+			next_wakeup = (rand * HZ) / 250;
+		else
+			next_wakeup = (rand * HZ) / 25;
+		break;
+	case FIP_GW_RCVD_SOL_AD:
+		/* if GW was ACKed */
+		fip_dbg(curr_gw->priv, LOG_PRIO_VERY_LOW,
+			"Discover login, gw_ack_rcv\n");
+		curr_gw->state = FIP_GW_WAITING_FOR_FLOGI;
+		wwn =
+		    guid_to_mac(be64_to_cpu
+				(curr_gw->priv->local_gid.global.interface_id));
+		wwnn = wwn | ((u64) 0x10 << 56);
+		wwpn = wwn | ((u64) 0x20 << 56) |
+		    ((u64) (curr_gw->info.gw_port_id & 0xfff) << 48);
+
+		ret = fcoib_create_vhba(curr_gw->priv->ca,
+					curr_gw->priv->port,
+					curr_gw->priv->max_ib_mtu,
+					curr_gw->info.gw_lid,
+					curr_gw->info.sl,
+					(u64) curr_gw,
+					fcoib_els_over_fip_cb, wwpn, wwnn);
+		if (ret) {
+			fip_dbg(curr_gw->priv, LOG_PRIO_VERY_LOW,
+				"discover login: failed create vhba\n");
+			curr_gw->state = FIP_GW_RCVD_SOL_AD;
+			break;
+		}
+		curr_gw->host_ka_tmr = jiffies;
+		curr_gw->host_ka_tmr_valid = 1;
+		curr_gw->gw_ka_tmr = jiffies + FKA_ADV_PERIOD * 3 * HZ;
+		curr_gw->gw_ka_tmr_valid = 1;
+		break;
+	case FIP_GW_RCVD_FLOGI_ACCPT:
+		fip_dbg(curr_gw->priv, LOG_PRIO_LOW,
+			"discover login: GW_CONNECTED!!!\n");
+		next_wakeup = FKA_ADV_PERIOD * HZ;
+		if (!curr_gw->vhba_ka_tmr_valid) {
+			curr_gw->vhba_ka_tmr = jiffies + 90 * HZ;
+			curr_gw->vhba_ka_tmr_valid = 1;
+		}
+		break;
+	default:
+		break;
+	}
+
+	fip_handle_gw_timers(curr_gw);
+
+	/* go to sleep until time out. We expect that we will be awaken by
+	 * RX packets and never get to wake up due to timeout
+	 */
+	if (next_wakeup > FKA_ADV_PERIOD * HZ)
+		next_wakeup = FKA_ADV_PERIOD * HZ;
+
+	cancel_delayed_work(&curr_gw->gw_task);
+	queue_delayed_work(fip_workqueue, &curr_gw->gw_task, next_wakeup);
+}
+
+/*
+ * This is the discover finite state machine that runs the
+ * advertise and solicit packet exchange of the discovery
+ * process.
+ * It is assumed that this function is only called from work queue
+ * task context (for locking)
+ */
+void fip_discover_fsm(struct work_struct *work)
+{
+	struct fip_discover *discover =
+	    container_of(work, struct fip_discover, task.work);
+	struct fip_dev_priv *priv =
+	    container_of(discover, struct fip_dev_priv, discover);
+	int recall_time = -1;
+
+	/* we got a flush request and we have not performed it yet */
+	if (discover->flush && discover->state != FIP_DISCOVER_OFF) {
+		fip_dbg(priv, LOG_PRIO_LOW,
+			"==>discover_fsm switching to OFF\n");
+
+		recall_time = DELAYED_WORK_CLEANUP_JIFFS * 2;
+
+		/* if we failed to remove all GWs we
+		 * will retry to remove them */
+		if (fip_free_gw_list(priv)) {
+			fip_dbg(priv, LOG_PRIO_LOW,
+				"fip_free_gw_list not done, recalling\n");
+			goto recall_fsm;
+		}
+		fip_dbg(priv, LOG_PRIO_LOW, "fip_free_gw_list done\n");
+
+		fip_discover_mcast_disconnect(priv);
+
+		if (fip_mcast_stop_thread(&priv->mcast)) {
+			fip_dbg(priv, LOG_PRIO_LOW, "fip_mcast_stop_thread"
+				" not done, recalling\n");
+			goto recall_fsm;
+		}
+
+		discover->state = FIP_DISCOVER_OFF;
+
+		/* signal the unload to continue */
+		up(&priv->discover.flush_done);
+		return;
+	}
+
+	if (FIP_DISCOVER_OFF)
+		return;
+
+	if (!priv->local_lid) {
+		recall_time = 1 * HZ;
+		goto recall_fsm;
+	}
+
+	switch (discover->state) {
+	case FIP_DISCOVER_OFF:
+		return;
+	case FIP_DISCOVER_INIT:
+		fip_dbg(priv, LOG_PRIO_LOW, "DISCOVER_INIT\n");
+		/* in init try and join the discover multicast group
+		 * This is a preliminary request for all other progress */
+		if (fip_discover_mcast_connect(priv)) {
+			fip_warn(priv, "failed to join MCAST groups "
+				 "allocate queues\n");
+			/* try again later */
+			recall_time = 1 * HZ;
+		}
+		break;
+
+	case FIP_DISCOVER_SOLICIT:
+		/* future mcast solicitation requests may be inserted here */
+		discover->state = FIP_DISCOVER_LOGIN;
+		discover->backoff_time = -1;
+		break;
+
+	case FIP_DISCOVER_LOGIN:
+		/* do nothing */
+		break;
+
+	default:
+		fip_warn(priv, "discover->state in illegal state %d\n",
+			discover->state);
+		break;
+
+	}
+
+recall_fsm:
+	if (recall_time >= 0)
+		queue_delayed_work(fip_workqueue, &discover->task, recall_time);
+
+	return;
+}
diff --git a/drivers/scsi/mlx4_fc/fcoib_main.c b/drivers/scsi/mlx4_fc/fcoib_main.c
new file mode 100644
index 0000000..393eac7
--- /dev/null
+++ b/drivers/scsi/mlx4_fc/fcoib_main.c
@@ -0,0 +1,1211 @@
+/*
+ * Copyright (c) 2010 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+
+#include <net/dst.h>
+
+#include "fcoib.h"
+
+MODULE_DESCRIPTION("FCoIB Discovery");
+MODULE_LICENSE("Dual BSD/GPL");
+
+int fip_debug = LOG_PRIO_HIGH;
+module_param_named(fip_debug_level, fip_debug, int, 0644);
+MODULE_PARM_DESC(fip_debug_level, "set verbosity level of debug message");
+
+struct workqueue_struct *fip_workqueue;
+struct workqueue_struct *fip_mng_workqueue;
+struct ib_sa_client fip_sa_client;
+
+static inline void fip_wr_pepare(struct fip_dev_priv *priv,
+				 struct ib_send_wr *tx_wr,
+				 struct ib_sge *tx_sge,
+				 unsigned int wr_id, u64 mapping,
+				 int size, u16 pkey_index)
+{
+	memset(tx_wr, 0, sizeof(struct ib_send_wr));
+	tx_wr->num_sge = 1;
+	tx_wr->sg_list = tx_sge;
+	tx_wr->opcode = IB_WR_SEND;
+	tx_wr->send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+	tx_wr->wr.ud.pkey_index = pkey_index;
+	tx_wr->wr_id = wr_id;
+
+	memset(tx_sge, 0, sizeof(struct ib_sge));
+	tx_sge->lkey = priv->mr->lkey;
+	tx_sge->addr = mapping;
+	tx_sge->length = size;
+}
+
+/*
+ * send a single multicast packet.
+ * return 0 on success, other on failure.
+*/
+int fip_mcast_send(struct fip_dev_priv *priv, struct ib_qp *qp,
+		   unsigned int wr_id, u64 mapping,
+		   int size, u16 pkey_index, struct mcast_entry *mcast)
+{
+	struct ib_send_wr *bad_wr;
+	struct ib_sge tx_sge;
+	struct ib_send_wr tx_wr;
+	int ret;
+
+	fip_wr_pepare(priv, &tx_wr, &tx_sge, wr_id, mapping, size, pkey_index);
+
+	tx_wr.wr.ud.ah = mcast->ah;
+	tx_wr.wr.ud.remote_qpn = 0xFFFFFFFF;
+	tx_wr.wr.ud.remote_qkey = mcast->qkey;
+
+	ret = ib_post_send(qp, &tx_wr, &bad_wr);
+
+	return ret;
+}
+
+/*
+ * send a single unicast packet.
+ * return 0 on success, other on failure.
+*/
+int fip_ucast_send(struct fip_dev_priv *priv, struct ib_qp *qp,
+		   unsigned int wr_id, u64 mapping,
+		   int size, u16 pkey_index, u32 dest_qpn, u16 dlid, u32 qkey)
+{
+	struct ib_send_wr *bad_wr;
+	struct ib_ah *new_ah;
+	struct ib_sge tx_sge;
+	struct ib_send_wr tx_wr;
+	int ret;
+	struct ib_ah_attr ah_attr = {
+		.dlid = dlid,
+		.port_num = priv->port,
+	};
+
+	fip_wr_pepare(priv, &tx_wr, &tx_sge, wr_id, mapping, size, pkey_index);
+
+	new_ah = ib_create_ah(priv->pd, &ah_attr);
+	if (IS_ERR(new_ah))
+		return -1;
+
+	tx_wr.wr.ud.ah = new_ah;
+	tx_wr.wr.ud.remote_qpn = dest_qpn;
+	tx_wr.wr.ud.remote_qkey = qkey;
+
+	ret = ib_post_send(qp, &tx_wr, &bad_wr);
+
+	ib_destroy_ah(new_ah);
+
+	return ret;
+}
+
+/*
+ * This is a general purpose CQ completion function that handles
+ * completions on RX and TX rings. It can serve all users that are
+ * using RX and TX rings.
+ * RX completions are destinguished from TX comp by the MSB that is set
+ * for RX and clear for TX. For RX, the memory is unmapped from the PCI,
+ * The head is incremented. For TX the memory is unmapped and then freed.
+ * The function returns the number of packets received.
+*/
+int fip_comp(struct fip_dev_priv *priv, struct ib_cq *cq,
+	     struct ring *rx_ring, struct ring *tx_ring)
+{
+#define FIP_DISCOVER_WC_COUNT 4
+	struct ib_wc ibwc[FIP_DISCOVER_WC_COUNT];
+	int wrid, n, i;
+	int mtu_size = FIP_UD_BUF_SIZE(priv->max_ib_mtu);
+	int rx_count = 0;
+
+	do {
+		/*
+		 * poll for up to FIP_DISCOVER_WC_COUNT in one request. n
+		 * returns the number of WC actually polled
+		 */
+		n = ib_poll_cq(cq, FIP_DISCOVER_WC_COUNT, ibwc);
+		for (i = 0; i < n; ++i) {
+			/*
+			 * use a mask on the id to decide if this is a receive
+			 * or  transmit WC
+			 */
+			if (ibwc[i].wr_id & FIP_OP_RECV) {
+				wrid = ibwc[i].wr_id & ~FIP_OP_RECV;
+
+				ib_dma_unmap_single(priv->ca,
+						    rx_ring->ring[wrid].
+						    bus_addr, mtu_size,
+						    DMA_FROM_DEVICE);
+
+				/* */
+				if (likely(ibwc[i].status == IB_WC_SUCCESS)) {
+					rx_ring->ring[wrid].length =
+					    ibwc[i].byte_len;
+					rx_ring->head =
+					    (wrid + 1) & (rx_ring->size - 1);
+					rx_count++;
+				} else {
+					rx_ring->ring[wrid].length = 0;
+					kfree(rx_ring->ring[wrid].mem);
+				}
+			} else {	/* TX completion */
+				wrid = ibwc[i].wr_id;
+
+				/* unmap and free transmitted packet */
+				ib_dma_unmap_single(priv->ca,
+						    tx_ring->ring[wrid].
+						    bus_addr, ibwc[i].byte_len,
+						    DMA_TO_DEVICE);
+
+				kfree(tx_ring->ring[wrid].mem);
+				tx_ring->ring[wrid].length = 0;
+				tx_ring->tail = wrid;
+			}
+		}
+	} while (n == FIP_DISCOVER_WC_COUNT);
+
+	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+
+	return rx_count;
+}
+
+/* qonfigure a newly allocated QP and move it
+ * from reset->init->RTR->RTS
+ */
+int fip_init_qp(struct fip_dev_priv *priv, struct ib_qp *qp,
+		u16 pkey_index, u32 qkey)
+{
+	int ret;
+	struct ib_qp_attr qp_attr;
+	int attr_mask;
+
+	/* TODO - fix this
+	   if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
+	   return -1; */
+
+	qp_attr.qp_state = IB_QPS_INIT;
+	qp_attr.qkey = qkey;
+	qp_attr.port_num = priv->port;
+	qp_attr.pkey_index = pkey_index;
+	attr_mask = IB_QP_QKEY | IB_QP_PORT | IB_QP_PKEY_INDEX | IB_QP_STATE;
+	ret = ib_modify_qp(qp, &qp_attr, attr_mask);
+	if (ret) {
+		fip_warn(priv, "failed to modify QP to init, ret = %d\n", ret);
+		fip_warn(priv, "qkey=%d, port_num=%d, pkey_index=0x%x,"
+			 " pkey_index=0x%x\n", (int)qp_attr.qkey,
+			 (int)qp_attr.port_num, (int)priv->pkey_index,
+			 (int)qp_attr.pkey_index);
+		goto out_fail;
+	}
+
+	qp_attr.qp_state = IB_QPS_RTR;
+	/* Can't set this in a INIT->RTR transition */
+	attr_mask &= ~IB_QP_PORT;
+	ret = ib_modify_qp(qp, &qp_attr, attr_mask);
+	if (ret) {
+		fip_warn(priv, "failed to modify QP to RTR, ret = %d\n", ret);
+		goto out_fail;
+	}
+
+	qp_attr.qp_state = IB_QPS_RTS;
+	qp_attr.sq_psn = 0;
+	attr_mask |= IB_QP_SQ_PSN;
+	attr_mask &= ~IB_QP_PKEY_INDEX;
+	ret = ib_modify_qp(qp, &qp_attr, attr_mask);
+	if (ret) {
+		fip_warn(priv, "failed to modify QP to RTS, ret = %d\n", ret);
+		goto out_fail;
+	}
+
+	return 0;
+
+out_fail:
+	qp_attr.qp_state = IB_QPS_RESET;
+	if (ib_modify_qp(qp, &qp_attr, IB_QP_STATE))
+		fip_warn(priv, "Failed to modify QP to RESET state\n");
+
+	return ret;
+}
+
+void fip_qp_to_err(struct fip_dev_priv *priv, struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+	struct ib_qp_init_attr qp_init_attr;
+	int timeout = 0;
+
+	qp_attr.qp_state = IB_QPS_ERR;
+	if (ib_modify_qp(qp, &qp_attr, IB_QP_STATE))
+		fip_warn(priv, "Failed to modify QP to RESET state\n");
+
+	do {
+		msleep(1 * (timeout != 0));
+		ib_query_qp(qp, &qp_attr, IB_QP_CUR_STATE, &qp_init_attr);
+		timeout++;
+	} while (qp_attr.cur_qp_state != IB_QPS_ERR && timeout < 100);
+
+	WARN_ON(qp_attr.cur_qp_state != IB_QPS_ERR);
+
+	return;
+}
+
+/*
+ * alloc a single buffer, map it and post it to the qp.
+ * id used to identify entry in receive queue.
+ */
+int fip_post_receive(struct fip_dev_priv *priv,
+		     struct ib_qp *qp,
+		     int size, int id, char *mem, struct ring_entry *mem_entry)
+{
+	struct ib_recv_wr rx_wr, *bad_wr;
+	struct ib_sge rx_sge;
+	int ret;
+
+	if (!mem) {
+		mem_entry->mem = kmalloc(size, GFP_KERNEL);
+		if (unlikely(!mem_entry->mem)) {
+			mem_entry->length = 0;
+			return -ENOMEM;
+		}
+	} else
+		mem_entry->mem = mem;
+
+	mem_entry->length = size;
+	mem_entry->bus_addr = ib_dma_map_single(priv->ca, mem_entry->mem, size,
+						DMA_FROM_DEVICE);
+
+	if (unlikely(ib_dma_mapping_error(priv->ca, mem_entry->bus_addr)))
+		goto error;
+
+	rx_wr.wr_id = id | FIP_OP_RECV;
+	rx_wr.next = NULL;
+	rx_wr.sg_list = &rx_sge;
+	rx_wr.num_sge = 1;
+	rx_sge.addr = mem_entry->bus_addr;
+	rx_sge.length = size;
+	rx_sge.lkey = priv->mr->lkey;
+
+	ret = ib_post_recv(qp, &rx_wr, &bad_wr);
+	if (unlikely(ret)) {
+		fip_warn(priv, "post receive failed for buf %d (%d)\n", id,
+			 ret);
+		goto post_recv_failed;
+	}
+	return 0;
+
+post_recv_failed:
+	ib_dma_unmap_single(priv->ca, rx_sge.addr, size, DMA_FROM_DEVICE);
+
+error:
+	mem_entry->length = 0;
+	kfree(mem_entry->mem);
+	return -EIO;
+}
+
+void fip_flush_rings(struct fip_dev_priv *priv,
+		     struct ib_cq *cq,
+		     struct ib_qp *qp,
+		     struct ring *rx_ring, struct ring *tx_ring)
+{
+	fip_dbg(priv, LOG_PRIO_LOW, "fip_qp_to_err called\n");
+	fip_qp_to_err(priv, qp);
+
+	spin_lock_irq(&priv->discover.lock);
+	fip_comp(priv, cq, rx_ring, tx_ring);
+	spin_unlock_irq(&priv->discover.lock);
+}
+
+void fip_free_rings(struct fip_dev_priv *priv,
+		    struct ring *rx_ring, struct ring *tx_ring)
+{
+	int i;
+
+	for (i = rx_ring->size - 1; i >= 0; i--)
+		if (rx_ring->ring[i].length != 0) {
+			ib_dma_unmap_single(priv->ca,
+					    rx_ring->ring[i].bus_addr,
+					    rx_ring->ring[i].length,
+					    DMA_FROM_DEVICE);
+			kfree(rx_ring->ring[i].mem);
+		}
+	rx_ring->size = 0;
+
+	for (i = tx_ring->size - 1; i >= 0; i--)
+		if (tx_ring->ring[i].length != 0) {
+			ib_dma_unmap_single(priv->ca,
+					    tx_ring->ring[i].bus_addr,
+					    tx_ring->ring[i].length,
+					    DMA_TO_DEVICE);
+			kfree(tx_ring->ring[i].mem);
+		}
+	tx_ring->size = 0;
+
+	fip_dbg(priv, LOG_PRIO_LOW, "==>Done cleaning RX and TX queues\n");
+
+	kfree(rx_ring->ring);
+	rx_ring->ring = NULL;
+	kfree(tx_ring->ring);
+	tx_ring->ring = NULL;
+}
+
+/*
+ * TODO - we can do a nicer job here. stage 2
+ *  allocates memory and post receives
+ */
+int fip_init_rx(struct fip_dev_priv *priv,
+		int ring_size, struct ib_qp *qp, struct ring *rx_ring)
+{
+	int i;
+	int mtu_size = FIP_UD_BUF_SIZE(priv->max_ib_mtu);
+
+	rx_ring->size = ring_size;
+	rx_ring->ring = kmalloc(rx_ring->size * sizeof(struct ring_entry),
+				GFP_KERNEL);
+	if (unlikely(!rx_ring->ring)) {
+		rx_ring->size = 0;
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < rx_ring->size; i++) {
+		if (fip_post_receive(priv, qp, mtu_size, i, NULL,
+				     rx_ring->ring + i)) {
+			/* we can not release memory without flushing QP */
+			for (; i < rx_ring->size; ++i) {
+				rx_ring->ring[i].mem = NULL;
+				rx_ring->ring[i].length = 0;
+			}
+			return -EIO;
+		}
+	}
+
+	rx_ring->head = 0;
+	rx_ring->tail = 0;
+
+	return 0;
+}
+
+/*
+ * This function allocates the tx buffers and initializes the head and
+ * tail indexes.
+ */
+int fip_init_tx(struct fip_dev_priv *priv, int size, struct ring *tx_ring)
+{
+	tx_ring->size = size;
+	tx_ring->ring = kzalloc(tx_ring->size * sizeof(struct ring_entry),
+				GFP_KERNEL);
+
+	if (!tx_ring->ring) {
+		fip_warn(priv, "fip_init_tx failed in alloc of tx. size=%d\n",
+			 tx_ring->size);
+		tx_ring->size = 0;
+		return -ENOMEM;
+	}
+
+	tx_ring->head = 0;
+	tx_ring->tail = tx_ring->size - 1;
+	return 0;
+}
+
+/*
+ * Allocate a PD and MR that will be used by all
+ * of the port's IB resources.
+ * Call fip_dev_cleanup to release
+ * the allocated resources.
+ */
+int fip_dev_init(struct fip_dev_priv *priv)
+{
+	struct ib_device *ca = priv->ca;
+
+	priv->pd = ib_alloc_pd(priv->ca);
+	if (IS_ERR(priv->pd)) {
+		fip_warn(priv, "%s: failed to allocate PD\n", ca->name);
+		return -ENODEV;
+	}
+
+	priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
+	if (IS_ERR(priv->mr)) {
+		fip_warn(priv, "%s: ib_get_dma_mr failed\n", ca->name);
+		goto out_free_pd;
+	}
+
+	return 0;
+
+out_free_pd:
+	ib_dealloc_pd(priv->pd);
+	return -ENODEV;
+}
+
+/*
+ * cleanup resources allocated by fip_dev_init
+*/
+void fip_dev_cleanup(struct fip_dev_priv *priv)
+{
+	/*ipoib_cm_dev_cleanup(dev); */
+
+	if (ib_dereg_mr(priv->mr))
+		fip_warn(priv, "ib_dereg_mr failed\n");
+
+	if (ib_dealloc_pd(priv->pd))
+		fip_warn(priv, "ib_dealloc_pd failed\n");
+}
+
+/* trigered by a core event */
+void fip_event(struct ib_event_handler *handler, struct ib_event *record)
+{
+	struct fip_dev_priv *priv =
+	    container_of(handler, struct fip_dev_priv, event_handler);
+
+	if (record->element.port_num != priv->port)
+		return;
+
+	switch (record->event) {
+	case IB_EVENT_SM_CHANGE:
+	case IB_EVENT_CLIENT_REREGISTER:
+	case IB_EVENT_PORT_ACTIVE:	/* link up */
+		/* queue restart of discovery a bit
+		 * delayed to prevent threshing */
+		queue_work(fip_workqueue, &priv->discover.mcast_refresh_task);
+		fip_dbg(priv, LOG_PRIO_MED, "==> event=%d (CLIENT_REREGISTER,"
+			" or SM_CHANGE or PORT_ACTIVE)\n", record->event);
+		break;
+
+	case IB_EVENT_PKEY_CHANGE:
+	case IB_EVENT_DEVICE_FATAL:
+	case IB_EVENT_LID_CHANGE:
+		queue_delayed_work(fip_mng_workqueue,
+				   &priv->restart_task, HZ / 10);
+		fip_dbg(priv, LOG_PRIO_MED,
+			"event=%d (PKEY_CHANGE or LID_CHANGE\n", record->event);
+		break;
+	case IB_EVENT_PORT_ERR:
+	case IB_EVENT_SRQ_ERR:
+	case IB_EVENT_SRQ_LIMIT_REACHED:
+	case IB_EVENT_QP_LAST_WQE_REACHED:
+	default:
+		fip_dbg(priv, LOG_PRIO_MED, "event=%d unhandled\n",
+			record->event);
+		break;
+	}
+}
+
+static inline int backoff_delay(struct mcast_entry *mcast)
+{
+	int delay = (mcast->backoff * HZ) + (jiffies % (HZ / 10));
+
+	mcast->backoff *= 2;
+	mcast->backoff = (mcast->backoff > FIP_MAX_BACKOFF_SECONDS) ?
+	    FIP_MAX_BACKOFF_SECONDS : mcast->backoff;
+	return delay;
+}
+
+static struct mcast_entry *mcast_alloc(void)
+{
+	struct mcast_entry *mcast;
+
+	mcast = kzalloc(sizeof *mcast, GFP_KERNEL);
+	if (!mcast)
+		return NULL;
+
+	atomic_set(&mcast->ref_cnt, 0);
+	INIT_LIST_HEAD(&mcast->list);
+	return mcast;
+}
+
+static void mcast_requeue_task(struct port_mcast_data *port_mcast, int delay)
+{
+	mutex_lock(&port_mcast->mlock);
+	if (!test_bit(MCAST_TASK_STOPPED, &port_mcast->flags))
+		queue_delayed_work(fip_workqueue, &port_mcast->mcast_task,
+				   delay);
+	mutex_unlock(&port_mcast->mlock);
+}
+
+/*
+ * This function attaches a QP to a multicast group for receive.
+ * If you only use the mcast for transmit you don't neet to call
+ * this function. The function sets the QP's QKEY to the mcask QKEY
+ * and adds the QP to the mcast group filter. If the mcast was not
+ * joined for RX or the mcast joined is not done the function
+ * returns an error. Caller must hold the mcast->lock.
+*/
+static int mcast_attach(struct mcast_entry *mcast, struct ib_qp *qp)
+{
+	if (test_bit(MCAST_FLAG_ATTACHED, &mcast->flags))
+		return 0;
+
+	/* attach QP to multicast group */
+	if (ib_attach_mcast(qp, &mcast->mcmember.mgid,
+			    be16_to_cpu(mcast->mcmember.mlid)))
+		goto attach_failed;
+
+	set_bit(MCAST_FLAG_ATTACHED, &mcast->flags);
+	return 0;
+
+attach_failed:
+	printk(KERN_ALERT "mlx4_fcoib: mcast_attach failed\n");
+	return -1;
+}
+
+/*
+ * This function creates an address header for a multicast group needed
+ * for TX (only). If the AH was previously created the previously created
+ * AH will be used and the function will return success. Caller must hold
+ * the mcast->lock.
+*/
+static int mcast_create_ah(struct mcast_entry *mcast)
+{
+	struct port_mcast_data *port_mcast = mcast->port_mcast;
+	struct ib_ah_attr av = {
+		.dlid = be16_to_cpu(mcast->mcmember.mlid),
+		.port_num = port_mcast->port,
+		.sl = mcast->mcmember.sl,
+		.ah_flags = IB_AH_GRH,
+		.static_rate = mcast->mcmember.rate,
+		.grh = {
+			.flow_label = be32_to_cpu(mcast->mcmember.flow_label),
+			.hop_limit = mcast->mcmember.hop_limit,
+			.sgid_index = 0,
+			.traffic_class = mcast->mcmember.traffic_class}
+	};
+
+	if (test_bit(MCAST_FLAG_AH_SET, &mcast->flags))
+		return 0;
+
+	av.grh.dgid = mcast->mcmember.mgid;
+
+	/* create multicast ah that will be used for all
+	 * traffic of this mcast group */
+	mcast->ah = ib_create_ah(port_mcast->pd, &av);
+
+	if (IS_ERR(mcast->ah)) {
+		printk(KERN_ALERT
+		       "mlx4_fcoib: mcast_create_ah, failed to alloc ah\n");
+		mcast->ah = NULL;
+		goto create_ah_failed;
+	}
+
+	set_bit(MCAST_FLAG_AH_SET, &mcast->flags);
+	return 0;
+
+create_ah_failed:
+	return -ENODEV;
+}
+
+/*
+ * Called as a callback to ib_sa_join_multicast after join termination. Checks
+ * that termination was successful and if so calls mcast_join_finish
+ * to attach a QP to it and recalls mcast_task (maybe add more mcasts).
+ * If join failed marks the mcast address as ready for retry and calls
+ * mcast_task with exponential backoff.
+*/
+static int mcast_join_complete(int status, struct ib_sa_multicast *multicast)
+{
+	struct mcast_entry *mcast = multicast->context;
+
+	/* We trap for port events ourselves. */
+	if (status == -ENETRESET)
+		return 0;
+
+	/* join_complete is OK */
+	if (status)
+		goto retry_join_mcast;
+
+	mcast->mcmember = multicast->rec;
+
+	set_bit(MCAST_FLAG_JOINED, &mcast->flags);
+
+	if (test_bit(MCAST_FLAG_RECV, &mcast->flags) &&
+	    mcast_attach(mcast, mcast->qp)) {
+		printk(KERN_ALERT "mlx4_fcoib: mcast_attach failed\n");
+		goto retry_join_mcast;
+	}
+
+	if (test_bit(MCAST_FLAG_SEND, &mcast->flags) &&
+	    mcast_create_ah(mcast)) {
+		printk(KERN_ALERT "mlx4_fcoib: mcast_create_ah failed\n");
+		goto unattach_mcast;
+	}
+
+	set_bit(MCAST_FLAG_DONE, &mcast->flags);
+
+	if (mcast->callback)
+		mcast->callback(mcast, mcast->context);
+
+	/* this is to make sure no one uses the context after the
+	 * callback */
+	mcast->context = NULL;
+
+	/* we will queue mcast_task again to process
+	 * other mcast join requests */
+	mcast_requeue_task(mcast->port_mcast, 0);
+	atomic_dec(&mcast->ref_cnt);
+	return 0;
+
+unattach_mcast:
+	if (test_and_clear_bit(MCAST_FLAG_ATTACHED, &mcast->flags)) {
+		ib_detach_mcast(mcast->qp,
+				&mcast->mcmember.mgid, mcast->mcmember.mlid);
+	}
+
+retry_join_mcast:
+	printk(KERN_ALERT "mlx4_fcoib: multicast join failed\n");
+
+	/* Clear the busy flag so we try again */
+	clear_bit(MCAST_FLAG_BUSY, &mcast->flags);
+
+	mcast_requeue_task(mcast->port_mcast, backoff_delay(mcast));
+	atomic_dec(&mcast->ref_cnt);
+	return -1;
+}
+
+/*
+ * Join a multicast group. The mcast GID must be up to date
+ * mcast->mcmember.mgid.
+ * This function should not be called directly because it might fail and it
+ * is assumed retries will be conducted by the mcast_task. instead add your
+ * multicast to the multicast_list and activate mcast_task.
+*/
+static int _mcast_join(struct port_mcast_data *port_mcast,
+		       struct mcast_entry *mcast, u16 pkey, u32 qkey)
+{
+	struct ib_sa_mcmember_rec rec = {
+		.join_state = 1
+	};
+	ib_sa_comp_mask comp_mask;
+	int ret = 0;
+
+	rec.mgid = mcast->mcmember.mgid;
+	rec.port_gid = port_mcast->local_gid;
+	rec.pkey = cpu_to_be16(pkey);
+
+	comp_mask =
+	    IB_SA_MCMEMBER_REC_MGID |
+	    IB_SA_MCMEMBER_REC_PORT_GID |
+	    IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE;
+
+	/*
+	 * we will attempt to join a multicast group. the reply will be
+	 * through the supplied callback mcast_join_complete.
+	 */
+	set_bit(MCAST_FLAG_BUSY, &mcast->flags);
+	mcast->sa_mcast = ib_sa_join_multicast(&fip_sa_client, port_mcast->ca,
+					       port_mcast->port, &rec,
+					       comp_mask, GFP_KERNEL,
+					       mcast_join_complete, mcast);
+
+	if (IS_ERR(mcast->sa_mcast)) {
+		clear_bit(MCAST_FLAG_BUSY, &mcast->flags);
+		ret = PTR_ERR(mcast->sa_mcast);
+		printk(KERN_ALERT "mlx4_fcoib: ib_sa_join_multicast failed\n");
+
+		/*
+		 * add a delayed call so it will retry
+		 * to join the mcast group later.
+		 */
+		mcast_requeue_task(port_mcast, backoff_delay(mcast));
+	}
+	return ret;
+}
+
+static int mcast_start_thread(struct port_mcast_data *port_mcast)
+{
+	mcast_requeue_task(port_mcast, 0);
+	return 0;
+}
+
+static int mcast_leave(struct mcast_entry *mcast, struct ib_qp *qp)
+{
+	if (test_and_set_bit(MCAST_FLAG_REMOVED, &mcast->flags))
+		return 0;
+
+	if (test_and_clear_bit(MCAST_FLAG_ATTACHED, &mcast->flags))
+		if (ib_detach_mcast(qp,
+				    &mcast->mcmember.mgid,
+				    mcast->mcmember.mlid))
+			printk(KERN_ALERT "mlx4_fcoib: "
+			       "ib_detach_mcast failed\n");
+
+	if (test_and_clear_bit(MCAST_FLAG_AH_SET, &mcast->flags))
+		if (ib_destroy_ah(mcast->ah))
+			printk(KERN_ALERT "mlx4_fcoib: ib_destroy_ah failed\n");
+
+	if (test_and_clear_bit(MCAST_FLAG_BUSY, &mcast->flags))
+		ib_sa_free_multicast(mcast->sa_mcast);
+
+	return 0;
+}
+
+/* free a mcast group. This function might sleep */
+void fip_mcast_free(struct mcast_entry *mcast)
+{
+	int max_wait = 10;
+
+	mutex_lock(&mcast->port_mcast->mlock);
+	list_del(&mcast->list);
+	mutex_unlock(&mcast->port_mcast->mlock);
+
+	while (atomic_read(&mcast->ref_cnt) && max_wait) {
+		msleep(50);
+		max_wait--;
+	}
+
+	if (mcast_leave(mcast, mcast->qp))
+		printk(KERN_ALERT "mlx4_fcoib: fip_mcast_free failed\n");
+
+	kfree(mcast);
+}
+
+/*
+ * Stop mcast task running on thread. If the work can not be stopped at the
+ * moment because it is pending or running the function would return an error
+ * (it would need to be recalled)
+ */
+int fip_mcast_stop_thread(struct port_mcast_data *port_mcast)
+{
+	mutex_lock(&port_mcast->mlock);
+	set_bit(MCAST_TASK_STOPPED, &port_mcast->flags);
+	cancel_delayed_work(&port_mcast->mcast_task);
+	mutex_unlock(&port_mcast->mlock);
+
+	if (delayed_work_pending(&port_mcast->mcast_task))
+		return -EBUSY;
+
+	return 0;
+}
+
+/*
+ * This function tries to join all the multicast groups that
+ * are currently presnt in port_mcast->multicast_list. The code
+ * goes over the list sequentially tries to join a single
+ * group per call. mcast groups that are already being processed
+ * are disregarded.
+ * To join an mcast group call fip_mcast_join. Do not call this
+ * function directly.
+*/
+void fip_mcast_join_task(struct work_struct *work)
+{
+	struct port_mcast_data *port_mcast =
+	    container_of(work, struct port_mcast_data, mcast_task.work);
+	int found = 0;
+
+	/* if multicast task is disabled return */
+	if (test_bit(MCAST_TASK_STOPPED, &port_mcast->flags))
+		return;
+
+	while (1) {
+		struct mcast_entry *mcast = NULL;
+
+		mutex_lock(&port_mcast->mlock);
+		list_for_each_entry(mcast, &port_mcast->multicast_list, list) {
+			if (!test_bit(MCAST_FLAG_BUSY, &mcast->flags) &&
+			    !test_bit(MCAST_FLAG_JOINED, &mcast->flags) &&
+			    !test_bit(MCAST_FLAG_REMOVED, &mcast->flags)) {
+				/* Found the next unjoined group */
+				found = 1;
+				atomic_inc(&mcast->ref_cnt);
+				break;
+			}
+		}
+		mutex_unlock(&port_mcast->mlock);
+
+		if (!found)
+			break;
+
+		if (_mcast_join(port_mcast, mcast, mcast->pkey, mcast->qkey))
+			atomic_dec(&mcast->ref_cnt);
+
+		break;
+	}
+}
+
+/*
+ * Join a new mcast address. The function receives a callback function to
+ * call upon completion of the join operation. Be mindful that
+ * a successful return of the function does not mean the mcast is joined.
+ */
+struct mcast_entry *fip_mcast_join(struct port_mcast_data *port_mcast,
+				    void *context, const char *mgid, u32 qkey,
+				    u16 pkey, struct ib_qp *qp,
+				    enum mcast_join_type type,
+				    void (*callback) (struct mcast_entry *,
+						      void *context))
+{
+	struct mcast_entry *mcast;
+
+	/* alloc a new mcast address */
+	mcast = mcast_alloc();
+	if (!mcast) {
+		printk(KERN_ALERT "mlx4_fcoib: "
+		       "fip_mcast_connect: mcast alloc failed\n");
+		goto mcast_connect_exit;
+	}
+
+	mcast->port_mcast = port_mcast;
+	mcast->callback = callback;
+	mcast->qkey = qkey;
+	mcast->pkey = pkey;
+	mcast->context = context;
+	mcast->qp = qp;
+	mcast->backoff = 1;
+
+	if (type != MCAST_SEND_ONLY)
+		set_bit(MCAST_FLAG_RECV, &mcast->flags);
+	if (type != MCAST_RECEIVE_ONLY)
+		set_bit(MCAST_FLAG_SEND, &mcast->flags);
+
+	memcpy(mcast->mcmember.mgid.raw, mgid, sizeof(union ib_gid));
+
+	mutex_lock(&port_mcast->mlock);
+	list_add_tail(&mcast->list, &port_mcast->multicast_list);
+	mutex_unlock(&port_mcast->mlock);
+
+	mcast_start_thread(port_mcast);
+
+	return mcast;
+
+mcast_connect_exit:
+	return NULL;
+}
+
+static void fip_add_one(struct ib_device *device);
+static void fip_remove_one(struct ib_device *device);
+
+static struct ib_client fip_client = {
+	.name = "fip",
+	.add = fip_add_one,
+	.remove = fip_remove_one
+};
+
+/*
+ * query the port for a few of it's properties like:
+ * LID, MTU, device capabilities, and GID. This function
+ * does not allocate any resources requiring cleanup.
+*/
+static int fip_query_port_caps(struct fip_dev_priv *priv, u8 port)
+{
+	struct ib_device_attr *device_attr;
+	struct ib_port_attr attr;
+	int result = -ENOMEM;
+
+	/* set max MTU */
+	if (!ib_query_port(priv->ca, port, &attr)) {
+		priv->local_lid = attr.lid;
+		priv->max_mtu_enum = attr.max_mtu;
+		priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
+	} else {
+		fip_warn(priv, "%s: ib_query_port %d failed\n",
+			 priv->ca->name, port);
+		goto device_query_failed;
+	}
+
+	if (attr.phys_state == 3)	/* port disable */
+		goto device_query_failed;
+
+	/* MTU will be reset when mcast join happens */
+	priv->mtu = FIP_UD_MTU(priv->max_ib_mtu);
+	priv->mcast_mtu = priv->mtu;
+	/* rate in Gb/sec = speed * width * 2.5 Gb/sec (speed is 1,2,4) */
+	priv->rate = ((int)attr.active_speed *
+		      ib_width_enum_to_int(attr.active_width) * 25) / 10;
+
+	result = ib_query_pkey(priv->ca, port, 0, &priv->pkey);
+	if (result) {
+		fip_warn(priv, "%s: ib_query_pkey port %d failed"
+			 " (ret = %d)\n", priv->ca->name, port, result);
+		goto device_query_failed;
+	}
+
+	device_attr = kmalloc(sizeof(*device_attr), GFP_KERNEL);
+	if (!device_attr) {
+		fip_warn(priv, "%s: allocation of %zu bytes failed\n",
+			 priv->ca->name, sizeof(*device_attr));
+		goto device_query_failed;
+	}
+
+	result = ib_query_device(priv->ca, device_attr);
+	if (result) {
+		fip_warn(priv, "%s: ib_query_device failed (ret = %d)\n",
+			 priv->ca->name, result);
+		kfree(device_attr);
+		goto device_query_failed;
+	}
+	priv->hca_caps = device_attr->device_cap_flags;
+
+	kfree(device_attr);
+
+	/*
+	 * Set the full membership bit, so that we join the right
+	 * broadcast group, etc.
+	 */
+	priv->pkey |= 0x8000;
+
+	result = ib_query_gid(priv->ca, port, 0, &priv->local_gid);
+	if (result) {
+		fip_warn(priv, "%s: ib_query_gid port %d failed (ret = %d)"
+			 "\n", priv->ca->name, port, result);
+		goto device_query_failed;
+	}
+
+	return 0;
+
+device_query_failed:
+	return result;
+}
+
+static void fip_remove_port(struct fip_dev_priv *priv)
+{
+	ib_unregister_event_handler(&priv->event_handler);
+
+	mutex_lock(&priv->mlock);
+	fip_discover_cleanup(priv);
+	fip_dev_cleanup(priv);
+	mutex_unlock(&priv->mlock);
+}
+
+void fip_discover_restart(struct work_struct *work)
+{
+	struct fip_dev_priv *priv =
+	    container_of(work, struct fip_dev_priv, restart_task.work);
+	int result;
+
+	mutex_lock(&priv->mlock);
+	fip_discover_cleanup(priv);
+
+	/* config MTU, GID, HW offload caps etc */
+	if (fip_query_port_caps(priv, priv->port)) {
+		fip_warn(priv, "fip_query_port failed\n");
+		goto err_query_port;
+	}
+
+	/*
+	 * open discover QP and move it to RTS. Alloc RX+TX rings and
+	 * call the discover queue work for the discover finite state machine
+	 */
+	result = fip_discover_init(priv);
+	if (result != 0) {
+		fip_warn(priv, "Failed to alloc discover resources "
+			 "ret=%d\n", result);
+	}
+
+err_query_port:
+	mutex_unlock(&priv->mlock);
+	return;
+}
+
+static void init_port_mcast(struct fip_dev_priv *priv,
+			    struct port_mcast_data *mcast)
+{
+	mcast->flags = 0;
+	INIT_DELAYED_WORK(&mcast->mcast_task, fip_mcast_join_task);
+	INIT_LIST_HEAD(&mcast->multicast_list);
+	mutex_init(&mcast->mlock);
+	mcast->port = priv->port;
+	mcast->ca = priv->ca;
+	mcast->local_gid = priv->local_gid;
+	mcast->mcast_mtu = priv->max_mtu_enum;
+	mcast->pd = priv->pd;
+	mcast->rate = priv->rate;
+}
+
+static struct fip_dev_priv *fip_add_port(const char *format,
+					 struct ib_device *hca, u8 port)
+{
+	struct fip_dev_priv *priv;
+	int result = -ENOMEM;
+
+	priv = kzalloc(sizeof(struct fip_dev_priv), GFP_KERNEL);
+	if (!priv)
+		return ERR_PTR(-ENOMEM);
+
+	/* init priv data structure vars */
+	priv->ca = hca;
+	priv->port = port;
+
+	/* config MTU, GID, HW offload caps etc */
+	if (fip_query_port_caps(priv, port)) {
+		fip_warn(priv, "fip_query_port failed\n");
+		goto device_init_failed;
+	}
+
+	INIT_DELAYED_WORK(&priv->restart_task, fip_discover_restart);
+	spin_lock_init(&priv->lock);
+	mutex_init(&priv->mlock);
+
+	/* create MR, PD, ... */
+	result = fip_dev_init(priv);
+	if (result != 0) {
+		fip_warn(priv, "Failed to alloc device resources ret=%d\n",
+			 result);
+		goto device_init_failed;
+	}
+
+	init_port_mcast(priv, &priv->mcast);
+
+	/*
+	 * open discover QP and move it to RTS. Alloc RX+TX rings and
+	 * call the discover queue work for the discover finite state machine
+	 */
+	result = fip_discover_init(priv);
+	if (result != 0) {
+		fip_warn(priv, "Failed to alloc discover resources "
+			 "ret=%d\n", result);
+		goto discover_init_failed;
+	}
+
+	/*
+	 * TODO - fix event handler
+	 * register callbacks for core events like change in LID, PKEY,...
+	 */
+	INIT_IB_EVENT_HANDLER(&priv->event_handler, priv->ca, fip_event);
+	result = ib_register_event_handler(&priv->event_handler);
+	if (result != 0) {
+		fip_warn(priv, "%s: ib_register_event_handler failed for "
+			 "port %d (ret = %d)\n", hca->name, port, result);
+		goto event_failed;
+	}
+
+	return priv;
+
+event_failed:
+	fip_discover_cleanup(priv);
+discover_init_failed:
+	fip_dev_cleanup(priv);
+device_init_failed:
+	kfree(priv);
+	return ERR_PTR(result);
+}
+
+static void fip_add_one(struct ib_device *device)
+{
+	struct list_head *dev_list;
+	struct fip_dev_priv *priv;
+	int s, e, p;
+
+	/* check IB device is mlx4 device */
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	dev_list = kmalloc(sizeof(*dev_list), GFP_KERNEL);
+	if (!dev_list)
+		return;
+
+	INIT_LIST_HEAD(dev_list);
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
+		s = 0;
+		e = 0;
+	} else {
+		s = 1;
+		e = device->phys_port_cnt;
+	}
+
+	for (p = s; p <= e; ++p) {
+		priv = fip_add_port("ib%d", device, p);
+		if (!IS_ERR(priv)) {
+			/*priv = netdev_priv(dev); */
+			list_add_tail(&priv->list, dev_list);
+		}
+	}
+
+	ib_set_client_data(device, &fip_client, dev_list);
+}
+
+static void fip_remove_one(struct ib_device *device)
+{
+	struct fip_dev_priv *priv, *tmp;
+	struct list_head *dev_list;
+
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	dev_list = ib_get_client_data(device, &fip_client);
+	if (!dev_list) {
+		printk(KERN_WARNING "dev_list is NULL on %s\n", device->name);
+		return;
+	}
+
+	/* flush_workqueue(fip_workqueue); */
+
+	list_for_each_entry_safe(priv, tmp, dev_list, list) {
+		fip_remove_port(priv);
+		list_del(&priv->list);
+		kfree(priv);
+	}
+
+	kfree(dev_list);
+}
+
+static int __init fip_init_module(void)
+{
+	int ret;
+
+	fip_workqueue = create_singlethread_workqueue("fip");
+	if (!fip_workqueue) {
+		ret = -ENOMEM;
+		goto err_workqueue;
+	}
+
+	fip_mng_workqueue = create_singlethread_workqueue("fip_create");
+	if (!fip_mng_workqueue) {
+		ret = -ENOMEM;
+		goto err_mng_workqueue;
+	}
+
+	ib_sa_register_client(&fip_sa_client);
+
+	ret = ib_register_client(&fip_client);
+	if (ret)
+		goto err_sa;
+
+	return 0;
+
+err_sa:
+	ib_sa_unregister_client(&fip_sa_client);
+	destroy_workqueue(fip_mng_workqueue);
+err_mng_workqueue:
+	destroy_workqueue(fip_workqueue);
+err_workqueue:
+	return ret;
+}
+
+static void __exit fip_cleanup_module(void)
+{
+	ib_unregister_client(&fip_client);
+	ib_sa_unregister_client(&fip_sa_client);
+	destroy_workqueue(fip_mng_workqueue);
+	destroy_workqueue(fip_workqueue);
+}
+
+module_init(fip_init_module);
+module_exit(fip_cleanup_module);
diff --git a/drivers/scsi/mlx4_fc/mfc.c b/drivers/scsi/mlx4_fc/mfc.c
new file mode 100644
index 0000000..74f6062
--- /dev/null
+++ b/drivers/scsi/mlx4_fc/mfc.c
@@ -0,0 +1,2003 @@
+/*
+ * Copyright (c) 2010 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/if_vlan.h>
+#include <linux/vmalloc.h>
+#include <linux/kthread.h>
+#include <linux/rtnetlink.h>
+
+#include <scsi/fc/fc_fip.h>
+
+#include "mfc.h"
+
+#define DRV_NAME	"mlnx_fc"
+#define PFX		DRV_NAME ": "
+#define DRV_VERSION	"1.1"
+#define DRV_RELDATE	"Feb 2010"
+
+MODULE_AUTHOR("Oren Duer/Vu Pham");
+MODULE_DESCRIPTION("Mellanox CX FCoE/FCoIB driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+int mfc_debug_mode;
+module_param_named(debug_mode, mfc_debug_mode, int, 0644);
+MODULE_PARM_DESC(debug_mode,
+		 "0 = None (default), 1 = use gw_mac as dest and burnt"
+		 " MAC as src.");
+
+char *gateway_mac;
+module_param_named(gw_mac, gateway_mac, charp, 0644);
+MODULE_PARM_DESC(gw_mac,
+		 "GW MAC. Used for Debug Mode 1. Format: XX:XX:XX:XX:XX:XX");
+u8 gw_mac[ETH_ALEN] = { 0, 0, 0, 0, 0, 0 };
+
+int mfc_payload_size = MLX4_DEFAULT_FC_MTU;
+module_param_named(frame_size, mfc_payload_size, int, 0444);
+MODULE_PARM_DESC(frame_size,
+		 "Frame payload size, default is "
+		 __stringify(MLX4_DEFAULT_FC_MTU));
+
+int mfc_num_reserved_xids = MLX4_DEFAULT_NUM_RESERVED_XIDS;
+module_param_named(num_reserved_xids, mfc_num_reserved_xids, int, 0444);
+MODULE_PARM_DESC(num_reserved_xids,
+		 "Max outstanding RFCI exchanges per virtual HBA. "
+		 "Default =  " __stringify(MLX4_DEFAULT_NUM_RESERVED_XIDS));
+
+int mfc_log_exch_per_vhba = MLX4_DEFAULT_LOG_EXCH_PER_VHBA;
+module_param_named(log_exch_per_vhba, mfc_log_exch_per_vhba, int, 0444);
+MODULE_PARM_DESC(log_exch_per_vhba,
+		 "Max outstanding FC exchanges per virtual HBA (log). "
+		 "Default =  " __stringify(MLX4_DEFAULT_LOG_EXCH_PER_VHBA));
+
+int max_vhba_per_port = MLX4_DEFAULT_MAX_VHBA_PER_PORT;
+module_param_named(max_vhba_per_port, max_vhba_per_port, int, 0444);
+MODULE_PARM_DESC(max_vhba_per_port, "Max vHBAs allowed per port. "
+		 "Default =  " __stringify(MLX4_DEFAULT_MAX_VHBA_PER_PORT));
+
+int max_cmd_per_lun = MFC_MAX_CMD_PER_LUN;
+module_param_named(cmd_per_lun, max_cmd_per_lun, int, 0444);
+MODULE_PARM_DESC(cmd_per_lun,
+		 "Max outstanding scsi commands can queue per lun. "
+		 "Default =  " __stringify(MFC_MAX_CMD_PER_LUN));
+
+int mfc_t11_mode = 1;
+static int mfc_dev_idx;
+
+LIST_HEAD(mfc_dev_list);
+DEFINE_SPINLOCK(mfc_dev_list_lock);
+
+struct scsi_transport_template *mfc_transport_template;
+
+static void mfc_link_work(struct work_struct *work);
+static int mfc_lld_reset(struct fc_lport *lp);
+static void mfc_lport_cleanup(struct fc_lport *lp);
+static void mfc_lport_abort_io(struct fc_lport *lp);
+static int mfc_abort(struct scsi_cmnd *cmd);
+static int mfc_device_reset(struct scsi_cmnd *cmd);
+static int mfc_host_reset(struct scsi_cmnd *cmd);
+static struct fc_seq *mfc_elsct_send(struct fc_lport *lport, u32 did,
+				     struct fc_frame *fp, unsigned int op,
+				     void (*resp) (struct fc_seq *,
+						   struct fc_frame *,
+						   void *), void *arg,
+						   u32 timeout);
+
+struct libfc_function_template mlx4_libfc_fcn_templ = {
+	.frame_send = mfc_frame_send,
+	.fcp_cleanup = mfc_lport_cleanup,
+	.fcp_abort_io = mfc_lport_abort_io,
+	.elsct_send = mfc_elsct_send,
+};
+
+struct scsi_host_template mfc_driver_template = {
+	.module = THIS_MODULE,
+	.name = "Mellanox CX2 FCoE/FCoIB driver",
+	.proc_name = DRV_NAME,
+	.queuecommand = mfc_queuecommand,
+	.slave_alloc = fc_slave_alloc,
+	.change_queue_depth = fc_change_queue_depth,
+	.this_id = -1,
+	.cmd_per_lun = MFC_MAX_CMD_PER_LUN,
+	.use_clustering = ENABLE_CLUSTERING,
+	.sg_tablesize = SG_ALL,
+	.max_sectors = MFC_MAX_FMR_PAGES,
+	.eh_abort_handler = mfc_abort,
+	.eh_device_reset_handler = mfc_device_reset,
+	.eh_host_reset_handler = mfc_host_reset,
+};
+
+int mfc_q_init(struct mfc_queue *q, u16 stride, size_t size, size_t info_size)
+{
+	q->prod = 0;
+	q->cons = 0xffffffff;
+	q->stride = stride;
+	q->size = size;
+	q->size_mask = q->size - 1;
+	q->info = NULL;
+
+	if (info_size) {
+		q->info = (u8 *) vmalloc(q->size * info_size);
+		if (!q->info)
+			return -ENOMEM;
+	}
+
+	spin_lock_init(&q->lock);
+	memset(q->info, 0, q->size * info_size);
+
+	return 0;
+}
+
+void mfc_q_destroy(struct mfc_queue *q)
+{
+	if (!q->info)
+		return;
+
+	vfree(q->info);
+	q->info = NULL;
+}
+
+void mfc_stamp_q(struct mfc_queue *q)
+{
+	__be32 *p;
+	int i;
+
+	/* stamp first dword of every 64 byte */
+	for (i = 0; i < q->size; ++i) {
+		p = q->buf + i * q->stride;
+		*p = cpu_to_be32(1 << 31);
+	}
+
+}
+
+static void mfc_arm_cq(struct mfc_cq *cq)
+{
+	mlx4_cq_arm(&cq->mcq, MLX4_CQ_DB_REQ_NOT,
+		    cq->vhba->mfc_port->mfc_dev->uar_map,
+		    MLX4_GET_DOORBELL_LOCK(&cq->vhba->mfc_port->mfc_dev->
+					   uar_lock));
+}
+
+static void mfc_cq_event(struct mlx4_cq *cq, enum mlx4_event event)
+{
+	printk(KERN_ERR PFX "CQ event = 0x%x\n", (unsigned int)event);
+}
+
+void mfc_cq_clean(struct mfc_cq *cq)
+{
+	struct mlx4_cq *mcq = &cq->mcq;
+	struct mfc_vhba *vhba = cq->vhba;
+	struct mlx4_cqe *cqe, cqe2;
+
+	cqe = (struct mlx4_cqe *)cq->buf + (mcq->cons_index & cq->size_mask);
+
+	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
+		    mcq->cons_index & cq->size)) {
+		cqe2 = *cqe;
+		mcq->cons_index++;
+		mlx4_cq_set_ci(mcq);
+
+		if (cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)
+			cq->comp_tx(vhba, &cqe2);
+		else
+			cq->comp_rx(vhba, &cqe2);
+
+		cqe =
+		    (struct mlx4_cqe *)cq->buf +
+		    (mcq->cons_index & cq->size_mask);
+	}
+}
+
+static void mfc_cq_comp(struct mlx4_cq *mcq)
+{
+	struct mfc_cq *cq = container_of(mcq, struct mfc_cq, mcq);
+
+	mfc_cq_clean(cq);
+	mfc_arm_cq(cq);
+}
+
+int mfc_create_cq(struct mfc_vhba *vhba, struct mfc_cq *cq,
+		  int entries, int eqidx, int arm, comp_fn comp_rx,
+		  comp_fn comp_tx, char *name)
+{
+	struct mfc_port *mfc_port = vhba->mfc_port;
+	struct mfc_dev *mfc_dev = mfc_port->mfc_dev;
+	int err;
+
+	cq->vhba = vhba;
+	cq->comp_rx = comp_rx;
+	cq->comp_tx = comp_tx;
+	strncpy(cq->name, name, sizeof(cq->name));
+
+	cq->size = roundup_pow_of_two(entries + 1);
+	cq->size_mask = cq->size - 1;
+	cq->buf_size = cq->size * sizeof(struct mlx4_cqe);
+
+	err = mlx4_alloc_hwq_res(mfc_dev->dev, &cq->wqres, cq->buf_size,
+				 cq->buf_size);
+	if (err)
+		return err;
+
+	cq->mcq.set_ci_db = cq->wqres.db.db;
+	cq->mcq.arm_db = cq->wqres.db.db + 1;
+	*cq->mcq.set_ci_db = 0;
+	*cq->mcq.arm_db = 0;
+
+	cq->buf = (struct mfc_cqe *)cq->wqres.buf.direct.buf;
+
+	err = mlx4_cq_alloc(mfc_dev->dev, cq->size, &cq->wqres.mtt,
+			    &mfc_dev->priv_uar, cq->wqres.db.dma, &cq->mcq,
+			    eqidx, 0);
+	if (err)
+		goto err_man;
+
+	cq->mcq.comp = mfc_cq_comp;
+	cq->mcq.event = mfc_cq_event;
+
+	if (arm)
+		mfc_arm_cq(cq);
+
+	return 0;
+
+err_man:
+	mlx4_free_hwq_res(mfc_dev->dev, &cq->wqres, cq->buf_size);
+	return err;
+}
+
+void mfc_destroy_cq(struct mfc_cq *cq)
+{
+	struct mfc_dev *mfc_dev = cq->vhba->mfc_port->mfc_dev;
+
+	mlx4_cq_free(mfc_dev->dev, &cq->mcq);
+	mlx4_free_hwq_res(mfc_dev->dev, &cq->wqres, cq->buf_size);
+	cq->buf_size = 0;
+	cq->buf = NULL;
+}
+
+int mfc_post_rx_buf(struct mfc_dev *mfc_dev, struct mfc_qp *fc_qp,
+		    void *buf, size_t buf_size)
+{
+	struct mfc_queue *rq = &fc_qp->rq;
+	struct mfc_rx_desc *rx_desc;
+	dma_addr_t dma;
+	int index;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rq->lock, flags);
+	if ((u32) (rq->prod - rq->cons) == rq->size) {
+		dev_err(mfc_dev->dma_dev,
+			"RFCI rq is full: prod 0x%x, cons 0x%x, size: 0x%x\n",
+			rq->prod, rq->cons, rq->size);
+		spin_unlock_irqrestore(&rq->lock, flags);
+		return -1;
+	}
+	index = rq->prod & rq->size_mask;
+	++rq->prod;
+	spin_unlock_irqrestore(&rq->lock, flags);
+
+	dma =
+	    pci_map_single(mfc_dev->dev->pdev, buf, buf_size,
+			   PCI_DMA_FROMDEVICE);
+	if (pci_dma_mapping_error(mfc_dev->dev->pdev, dma)) {
+		dev_err(mfc_dev->dma_dev, "Failed to pci_map_single\n");
+		return -1;
+	}
+
+	rx_desc = rq->buf + (index * rq->stride);
+	rx_desc->data[0].count = cpu_to_be32(buf_size);
+	rx_desc->data[0].mem_type = cpu_to_be32(mfc_dev->mr.key);
+	rx_desc->data[0].addr = cpu_to_be64(dma);
+
+	return index;
+}
+
+static u32 hw_index_to_key(u32 ind)
+{
+	return (ind >> 24) | (ind << 8);
+}
+
+static u64 mac_to_u64(u8 *mac)
+{
+	int i;
+	u64 ret = 0;
+
+	for (i = 0; i < 6; i++) {
+		ret <<= 8;
+		ret |= mac[i];
+	}
+	return ret;
+}
+
+static void u64_to_mac(u8 mac[6], u64 u64mac)
+{
+	int i;
+
+	for (i = 5; i >= 0; i--) {
+		mac[i] = u64mac & 0xff;
+		u64mac >>= 8;
+	}
+}
+
+static void mfc_update_src_mac(struct fc_lport *lp, u8 * addr)
+{
+	struct mfc_vhba *vhba = lport_priv(lp);
+
+	memcpy(vhba->rfci[RFCI_DATA].mac, addr, ETH_ALEN);
+}
+
+static u8 *mfc_get_src_addr(struct fc_lport *lp)
+{
+	struct mfc_vhba *vhba = lport_priv(lp);
+
+	return vhba->rfci[RFCI_DATA].mac;
+}
+
+static int mlx4_CONFIG_FC_BASIC(struct mlx4_dev *dev, u8 port,
+				struct mfc_basic_config_params *params)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	int err;
+
+#define CONFIG_FC_FEXCH_BASE_OFFSET	0x0
+#define CONFIG_FC_NM_OFFSET		0x5
+#define CONFIG_FC_NV_OFFSET		0x6
+#define CONFIG_FC_NP_OOFSET		0x7
+#define CONFIG_FC_BASEMPT_OFFSET	0x8
+#define CONFIG_FC_NUM_RFCI_OFFSET	0xc
+#define CONFIG_FC_RFCI_BASE_OFFSET	0xd
+#define CONFIG_FC_PROMISC_QPN_OFFSET	0x14
+#define CONFIG_FC_MCAST_QPN_OFFSET	0x18
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	memset(mailbox->buf, 0, 256);
+
+	MLX4_PUT(mailbox->buf, params->fexch_base, CONFIG_FC_FEXCH_BASE_OFFSET);
+	MLX4_PUT(mailbox->buf, params->nm, CONFIG_FC_NM_OFFSET);
+	MLX4_PUT(mailbox->buf, params->nv, CONFIG_FC_NV_OFFSET);
+	MLX4_PUT(mailbox->buf, params->np, CONFIG_FC_NP_OOFSET);
+	MLX4_PUT(mailbox->buf, (hw_index_to_key(params->fexch_base_mpt)),
+		 CONFIG_FC_BASEMPT_OFFSET);
+	MLX4_PUT(mailbox->buf,
+		 params->rfci_base | (((u32) params->log_num_rfci) << 24),
+		 CONFIG_FC_NUM_RFCI_OFFSET);
+	MLX4_PUT(mailbox->buf, params->def_fcoe_promisc_qpn,
+		 CONFIG_FC_PROMISC_QPN_OFFSET);
+	MLX4_PUT(mailbox->buf, params->def_fcoe_mcast_qpn,
+		 CONFIG_FC_MCAST_QPN_OFFSET);
+
+	err = mlx4_cmd(dev, mailbox->dma,
+		       MLX4_CMD_INMOD_BASIC_CONF | port,
+		       MLX4_CMD_MOD_FC_ENABLE,
+		       MLX4_CMD_CONFIG_FC, MLX4_CMD_TIME_CLASS_B);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+
+}
+
+static int mlx4_CONFIG_FC_NPORT_ID(struct mlx4_dev *dev, u8 port,
+				   struct nport_id *npid)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	int err = 0;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	memcpy(mailbox->buf, npid, MFC_NUM_NPORT_IDS * sizeof(u32));
+
+	err = mlx4_cmd(dev, mailbox->dma,
+		       MLX4_CMD_INMOD_NPORT_TAB | port,
+		       MLX4_CMD_MOD_FC_ENABLE,
+		       MLX4_CMD_CONFIG_FC, MLX4_CMD_TIME_CLASS_B);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+
+int mfc_flogi_finished(struct fc_lport *lp)
+{
+	struct mfc_vhba *vhba = lport_priv(lp);
+	struct mfc_port *fc_port = vhba->mfc_port;
+	int err = 0;
+
+	/* start data RFCI in FCoE mode */
+	if (vhba->net_type == NET_ETH && !mfc_debug_mode) {
+		err = mfc_start_rfci_data(vhba,
+					  mac_to_u64(vhba->rfci[RFCI_DATA].
+						     mac));
+		if (err) {
+			dev_err(fc_port->mfc_dev->dma_dev,
+				"port%d vhba%d fail to start DATA RFCI %d\n",
+				fc_port->port, vhba->idx, err);
+			goto err;
+		}
+	}
+
+	if ((vhba->idx < 0) || (vhba->idx >= MFC_NUM_NPORT_IDS)) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	memcpy(&fc_port->npid_table[vhba->idx], &vhba->my_npid,
+	       sizeof(vhba->my_npid));
+
+	err = mlx4_CONFIG_FC_NPORT_ID(fc_port->mfc_dev->dev, fc_port->port,
+				      fc_port->npid_table);
+	if (err) {
+		dev_err(fc_port->mfc_dev->dma_dev,
+			"port%d vhba%d: Couldn't cfg npid %x:%x:%x to idx %d\n",
+			fc_port->port, vhba->idx, vhba->my_npid.fid[0],
+			vhba->my_npid.fid[1], vhba->my_npid.fid[2], vhba->idx);
+		goto err;
+	}
+
+	dev_info(fc_port->mfc_dev->dma_dev,
+		 "FLOGI finished NPort ID %02x:%02x:%02x, idx=%d\n",
+		 vhba->my_npid.fid[0], vhba->my_npid.fid[1],
+		 vhba->my_npid.fid[2], vhba->idx);
+
+	err = mfc_init_fcmd(vhba);
+	if (err)
+		dev_err(fc_port->mfc_dev->dma_dev,
+			"port%d vhba%d: Could not init FCMD, err=%d\n",
+			fc_port->port, vhba->idx, err);
+err:
+	return err;
+}
+
+static int mlx4_rport_login(struct fc_rport_priv *rdata)
+{
+	struct fc_lport *lport = rdata->local_port;
+	struct mfc_vhba *vhba = lport_priv(lport);
+
+	dev_info(vhba->mfc_port->mfc_dev->dma_dev,
+		 "FLOGI finished. fid: %06x\n", fc_host_port_id(lport->host));
+
+	if (!vhba->flogi_finished) {
+		vhba->flogi_finished++;
+		vhba->my_npid.reserved = 0;
+		vhba->my_npid.fid[0] =
+		    (fc_host_port_id(lport->host) >> 16) & 0xff;
+		vhba->my_npid.fid[1] =
+		    (fc_host_port_id(lport->host) >> 8) & 0xff;
+		vhba->my_npid.fid[2] = fc_host_port_id(lport->host) & 0xff;
+		mfc_flogi_finished(lport);
+		vhba->flogi_progress = 0;
+	}
+
+	return vhba->fc_rport_login(rdata);
+}
+
+static void mfc_lport_destroy(struct fc_lport *lp)
+{
+	struct mfc_vhba *vhba = lport_priv(lp);
+
+	fc_lport_free_stats(lp);
+
+	if (vhba->emp) {
+		fc_exch_mgr_free(lp);
+		vhba->emp = NULL;
+	}
+}
+
+static int mfc_lport_config(struct fc_lport *lp)
+{
+	lp->link_up = 0;
+	lp->qfull = 0;
+	lp->max_retry_count = 3;
+	lp->max_rport_retry_count = 3;
+	lp->e_d_tov = 2 * 1000;
+	lp->r_a_tov = 2 * 2 * 1000;
+	lp->service_params = (FCP_SPPF_INIT_FCN | FCP_SPPF_RD_XRDY_DIS |
+			      FCP_SPPF_RETRY | FCP_SPPF_CONF_COMPL);
+	lp->link_supported_speeds |= FC_PORTSPEED_1GBIT | FC_PORTSPEED_4GBIT |
+	    FC_PORTSPEED_10GBIT | FC_PORTSPEED_8GBIT | FC_PORTSPEED_16GBIT;
+	lp->link_speed = FC_PORTSPEED_10GBIT;
+
+	if (fc_lport_init_stats(lp))
+		goto err_out;
+
+	fc_lport_config(lp);
+
+	/* offload related configuration */
+	lp->crc_offload = 0;
+	lp->seq_offload = 0;
+	lp->lro_enabled = 0;
+	lp->lro_xid = 0;
+	lp->lso_max = 0;
+
+	return 0;
+
+err_out:
+
+	return -ENOMEM;
+}
+
+static void mfc_lport_cleanup(struct fc_lport *lp)
+{
+	struct mfc_vhba *vhba = lport_priv(lp);
+
+	dev_info(vhba->mfc_port->mfc_dev->dma_dev,
+		 "port%d vhba%d: lport lld_cleanup\n",
+		 vhba->mfc_port->port, vhba->idx);
+
+	vhba->need_reset = 1;
+	mfc_lld_reset(lp);
+}
+
+static void mfc_lport_abort_io(struct fc_lport *lp)
+{
+	struct mfc_vhba *vhba = lport_priv(lp);
+
+	dev_info(vhba->mfc_port->mfc_dev->dma_dev,
+		 "port%d vhba%d: lport lld_abort_io\n",
+		 vhba->mfc_port->port, vhba->idx);
+}
+
+static int mlx4_fip_recv(struct sk_buff *skb, struct net_device *dev,
+			 struct packet_type *ptype, struct net_device *orig_dev)
+{
+	struct mfc_vhba *vhba =
+	    container_of(ptype, struct mfc_vhba, fip_packet_type);
+	struct ethhdr *eh = eth_hdr(skb);
+
+	fcoe_ctlr_recv(&vhba->ctlr, skb);
+
+	/* XXX: This is ugly */
+	memcpy(vhba->dest_addr, eh->h_source, 6);
+
+	return 0;
+}
+
+static void mlx4_fip_send(struct fcoe_ctlr *fip, struct sk_buff *skb)
+{
+	skb->dev = (struct net_device *)mlx4_from_ctlr(fip)->underdev;
+	dev_queue_xmit(skb);
+}
+
+static int mlx4_fip_ctrl_start(struct mfc_vhba *vhba)
+{
+	struct net_device *netdev = (struct net_device *)vhba->underdev;
+
+	/* Setup lport private data to point to fcoe softc */
+	vhba->ctlr.lp = vhba->lp;
+
+	/* setup Source Mac Address */
+	if (!vhba->ctlr.spma)
+		memcpy(vhba->ctlr.ctl_src_addr, netdev->dev_addr,
+		       netdev->addr_len);
+
+	dev_mc_add(netdev, FIP_ALL_ENODE_MACS);
+
+	vhba->fip_packet_type.func = mlx4_fip_recv;
+	vhba->fip_packet_type.type = htons(ETH_P_FIP);
+	vhba->fip_packet_type.dev = netdev;
+	dev_add_pack(&vhba->fip_packet_type);
+
+	return 0;
+}
+
+int mlx4_fip_ctrl_stop(struct mfc_vhba *vhba)
+{
+	dev_remove_pack(&vhba->fip_packet_type);
+	fcoe_ctlr_link_down(&vhba->ctlr);
+	fcoe_ctlr_destroy(&vhba->ctlr);
+
+	return 0;
+}
+
+static void mfc_libfc_destroy(struct fc_lport *lp)
+{
+	fc_remove_host(lp->host);
+	scsi_remove_host(lp->host);
+	fc_lport_destroy(lp);
+}
+
+static void mfc_flogi_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg)
+{
+	struct fcoe_ctlr *fip = arg;
+	struct fc_exch *exch = fc_seq_exch(seq);
+	struct fc_lport *lport = exch->lp;
+	struct mfc_vhba *vhba = lport_priv(lport);
+	u8 *mac;
+
+	if (IS_ERR(fp))
+		goto done;
+
+	mac = fr_cb(fp)->granted_mac;
+	if (is_zero_ether_addr(mac) && vhba->net_type == NET_ETH) {
+		/* pre-FIP */
+		if (fcoe_ctlr_recv_flogi(fip, lport, fp)) {
+			fc_frame_free(fp);
+			return;
+		}
+	}
+
+	mfc_update_src_mac(lport, mac);
+done:
+	fc_lport_flogi_resp(seq, fp, lport);
+}
+
+static void mfc_logo_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg)
+{
+	struct fc_lport *lport = arg;
+	static u8 zero_mac[ETH_ALEN] = { 0 };
+
+	if (!IS_ERR(fp))
+		mfc_update_src_mac(lport, zero_mac);
+	fc_lport_logo_resp(seq, fp, lport);
+}
+
+static struct fc_seq *mfc_elsct_send(struct fc_lport *lport, u32 did,
+				     struct fc_frame *fp, unsigned int op,
+				     void (*resp) (struct fc_seq *,
+						   struct fc_frame *,
+						   void *), void *arg,
+						   u32 timeout)
+{
+	struct mfc_vhba *vhba = lport_priv(lport);
+	struct fcoe_ctlr *fip = &vhba->ctlr;
+	struct fc_frame_header *fh = fc_frame_header_get(fp);
+
+	switch (op) {
+	case ELS_FLOGI:
+	case ELS_FDISC:
+		return fc_elsct_send(lport, did, fp, op, mfc_flogi_resp,
+				     fip, timeout);
+	case ELS_LOGO:
+		/* only hook onto fabric logouts, not port logouts */
+		if (ntoh24(fh->fh_d_id) != FC_FID_FLOGI)
+			break;
+		return fc_elsct_send(lport, did, fp, op, mfc_logo_resp,
+				     lport, timeout);
+	}
+	return fc_elsct_send(lport, did, fp, op, resp, arg, timeout);
+}
+
+static int mfc_libfc_init(struct fc_lport *lp, int min_xid, int max_xid,
+			  const char *symbolic_name, u64 wwpn, u64 wwnn)
+{
+	struct mfc_vhba *vhba = lport_priv(lp);
+	int err;
+
+	fc_set_wwnn(lp, wwnn);
+	fc_set_wwpn(lp, wwpn);
+
+	/* libfc expects max FC frame size, including native FC header */
+	fc_set_mfs(lp, vhba->fc_payload_size + sizeof(struct fc_frame_header));
+
+	lp->host->max_lun = MFC_MAX_LUN;
+	lp->host->max_id = MFC_MAX_FCP_TARGET;
+	lp->host->max_channel = 0;
+	lp->host->transportt = mfc_transport_template;
+
+	err = scsi_add_host(lp->host, NULL);
+	if (err) {
+		dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+			"Failed scsi_add_host port %d vhba %d\n",
+			vhba->mfc_port->port, vhba->idx);
+		return err;
+	}
+
+	snprintf(fc_host_symbolic_name(lp->host), FC_SYMBOLIC_NAME_SIZE,
+		 "%s v%s over %s", DRV_NAME, DRV_VERSION, symbolic_name);
+
+	if (vhba->net_type == NET_ETH) {
+		/* Initialize FIP */
+		fcoe_ctlr_init(&vhba->ctlr, FIP_MODE_AUTO);
+		vhba->ctlr.send = mlx4_fip_send;
+		vhba->ctlr.update_mac = mfc_update_src_mac;
+		vhba->ctlr.get_src_addr = mfc_get_src_addr;
+	}
+
+	lp->tt = mlx4_libfc_fcn_templ;
+
+	fc_exch_init(lp);
+	fc_elsct_init(lp);
+	fc_lport_init(lp);
+	fc_rport_init(lp);
+
+	if (vhba->net_type == NET_ETH) {
+		vhba->fc_rport_login = (void *)lp->tt.rport_login;
+		lp->tt.rport_login = (void *)mlx4_rport_login;
+	}
+
+	fc_disc_init(lp);
+
+	vhba->emp = fc_exch_mgr_alloc(lp, FC_CLASS_3, min_xid, max_xid, NULL);
+	if (!vhba->emp) {
+		dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+			"Failed allo libfc exch manager on port %d vhba %d\n",
+			vhba->mfc_port->port, vhba->idx);
+		return -ENOMEM;
+	}
+
+	if (vhba->net_type == NET_IB)
+		fc_fabric_login(lp);
+
+	return 0;
+}
+
+int mfc_create_vhba(struct mfc_port *fc_port,
+		    unsigned int mtu,
+		    int vlan_id, int prio,
+		    int dest_lid, unsigned long dest_ctrl_qpn,
+		    unsigned long dest_data_qpn, int dest_sl,
+		    void *underdev, const char *symbolic_name,
+		    u64 gw_discovery_handle,
+		    fcoib_send_els_cb fcoib_send_els_cb,
+		    enum mfc_net_type net_type, u64 wwpn, u64 wwnn)
+{
+	struct mfc_dev *mfc_dev = fc_port->mfc_dev;
+	struct mlx4_caps *caps = &mfc_dev->dev->caps;
+	struct fc_lport *lp;
+	struct mfc_vhba *vhba;
+	int idx, port = fc_port->port;
+	int err;
+	unsigned long flags;
+	struct Scsi_Host *shost;
+
+	mfc_driver_template.can_queue = (1 << mfc_log_exch_per_vhba) -
+	    mfc_num_reserved_xids;
+
+	lp = libfc_host_alloc(&mfc_driver_template, sizeof(struct mfc_vhba));
+	if (!lp) {
+		dev_err(mfc_dev->dma_dev,
+			"Could not allocate lport on port %d\n", port);
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	shost = lp->host;
+	vhba = lport_priv(lp);
+	vhba->lp = lp;
+	vhba->gw_discovery_handle = gw_discovery_handle;
+	vhba->fcoib_send_els_cb = fcoib_send_els_cb;
+
+	err = mfc_lport_config(lp);
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"Error configuring lport on port %d\n", port);
+		goto err_host_put;
+	}
+
+	idx = mfc_bitmap_slot_alloc(&fc_port->fexch_bulk_bm, 1);
+	if (idx == -1) {
+		dev_err(mfc_dev->dma_dev,
+			"Failed alloc fexchs for new vhba on port %d\n", port);
+		err = -ENOMEM;
+		goto err_lport_destroy;
+	}
+	vhba->idx = idx;
+	vhba->mfc_port = fc_port;
+	vhba->underdev = underdev;
+	vhba->rfci[RFCI_DATA].fc_mac_idx = -1;
+	/* TODO: needed? */
+	vhba->rfci_rx_enabled = 0;
+
+	if (!mfc_t11_mode) {
+		vhba->fcoe_hlen = sizeof(struct fcoe_hdr_old);
+		vhba->fc_payload_size = mtu -
+		    sizeof(struct fcoe_hdr_old) -
+		    sizeof(struct fc_frame_header) -
+		    sizeof(struct fcoe_crc_eof_old);
+	} else {
+		vhba->fcoe_hlen = sizeof(struct fcoe_hdr);
+		vhba->fc_payload_size = mtu -
+		    sizeof(struct fcoe_hdr) -
+		    sizeof(struct fc_frame_header) -
+		    sizeof(struct fcoe_crc_eof);
+	}
+
+	if (net_type == NET_IB) {
+		vhba->fc_payload_size -= 2;
+		if (!mfc_t11_mode)
+			/* in IB pre-T11 we have 3 padding in EOF */
+			vhba->fc_payload_size -= 3;
+	}
+
+	/*
+	 * Enforcing the fc_payload_size to 8B multiple to work-around
+	 * Tachyon/Tachlite DIF insertion/marshalling on 8B alignment.
+	 */
+	vhba->fc_payload_size = min(mfc_payload_size,
+				    vhba->fc_payload_size) & 0xFFFFFFFFFFFFFFF0;
+	vhba->num_fexch = 1 << fc_port->log_num_fexch_per_vhba;
+	vhba->base_fexch_qpn = fc_port->base_fexch_qpn + idx * vhba->num_fexch;
+	vhba->base_fexch_mpt = fc_port->base_fexch_mpt + idx * vhba->num_fexch;
+
+	dev_info(mfc_dev->dma_dev,
+		 "vhba %d type %s on port %d b_qpn=0x%x, b_mpt=0x%x, n_fexch=%d"
+		 " fc_payload_size=%d\n",
+		 vhba->idx, (net_type == NET_IB) ? "NET_IB" : "NET_ETH", port,
+		 vhba->base_fexch_qpn, vhba->base_fexch_mpt, vhba->num_fexch,
+		 vhba->fc_payload_size);
+
+	vhba->net_type = net_type;
+	vhba->dest_ib_lid = dest_lid;
+	vhba->dest_ib_ctrl_qpn = dest_ctrl_qpn;
+	vhba->dest_ib_data_qpn = dest_data_qpn;
+	vhba->dest_ib_sl = dest_sl;
+
+	vhba->fc_vlan_id = vlan_id;
+	vhba->fc_vlan_prio = prio;
+	if (vlan_id != -1) {
+		err = mlx4_register_vlan(mfc_dev->dev, port, vlan_id,
+					 &vhba->fc_vlan_idx);
+		if (err) {
+			dev_err(mfc_dev->dma_dev,
+				"Fail to reg VLAN %d err=0x%x port%d vhba%d\n",
+				vlan_id, err, port, idx);
+			goto err_free_fexch_bulk;
+		}
+		dev_info(mfc_dev->dma_dev,
+			 "Reg vlan %d prio %d to index %d on port %d vhba %d\n",
+			 vlan_id, prio, vhba->fc_vlan_idx, port, idx);
+	}
+	u64_to_mac(vhba->rfci[RFCI_CTRL].mac, caps->def_mac[port]);
+
+	err = mfc_create_rfci(vhba, &vhba->rfci[RFCI_CTRL],
+			      caps->def_mac[port]);
+
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"port%d vhba%d: Could not create CTRL RFCI, err=%d\n",
+			port, idx, err);
+		goto err_unreg_vlan;
+	}
+
+	err = mfc_create_fcmd(vhba);
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"port%d vhba%d: Could not create FCMD, err=%d\n",
+			port, idx, err);
+		goto err_destroy_rfci_ctrl;
+	}
+
+	err = mfc_libfc_init(lp, vhba->base_reserved_xid,
+			     vhba->base_reserved_xid + vhba->num_reserved_xid,
+			     symbolic_name, wwpn, wwnn);
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"Could not init libfc port %d vhba %d\n", port, idx);
+
+		goto err_destroy_fcmd;
+	}
+
+	err = mfc_init_rfci(vhba, &vhba->rfci[RFCI_CTRL]);
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"Could not init CTRL RFCI err=%x port %d vhba %d\n",
+			err, port, idx);
+		goto err_destroy_libfc;
+	}
+
+	memcpy(vhba->dest_addr, gw_mac, ETH_ALEN);
+	INIT_DELAYED_WORK(&vhba->delayed_work, mfc_link_work);
+
+	spin_lock_irqsave(&fc_port->lock, flags);
+	list_add(&vhba->list, &fc_port->vhba_list);
+	spin_unlock_irqrestore(&fc_port->lock, flags);
+
+	mfc_vhba_create_dentry(vhba);
+
+	if (net_type == NET_IB)
+		fc_linkup(lp);
+	else if (net_type == NET_ETH) {
+		mlx4_fip_ctrl_start(vhba);
+		fcoe_ctlr_link_up(&vhba->ctlr);
+		fc_fabric_login(lp);
+		vhba->link_up = 1;
+	}
+
+	return 0;
+
+err_destroy_libfc:
+	mfc_libfc_destroy(lp);
+err_destroy_fcmd:
+	mfc_destroy_fcmd(vhba);
+err_destroy_rfci_ctrl:
+	mfc_destroy_rfci(vhba, &vhba->rfci[RFCI_CTRL]);
+err_unreg_vlan:
+	if (vhba->fc_vlan_id != -1)
+		mlx4_unregister_vlan(mfc_dev->dev, port, vhba->fc_vlan_idx);
+err_free_fexch_bulk:
+	mfc_bitmap_slot_free(&fc_port->fexch_bulk_bm, idx);
+err_lport_destroy:
+	mfc_lport_destroy(lp);
+err_host_put:
+	scsi_host_put(lp->host);
+err_out:
+	return err;
+}
+
+/* vhba->mfc_port->lock must be held */
+void mfc_remove_vhba(struct mfc_vhba *vhba)
+{
+	struct mfc_port *fc_port = vhba->mfc_port;
+	struct mfc_dev *mfc_dev = fc_port->mfc_dev;
+	int port = fc_port->port, idx = vhba->idx;
+	struct fc_lport *lp = vhba->lp;
+	unsigned long flags;
+
+	vhba->need_reset = 1;
+	mfc_vhba_delete_dentry(vhba);
+
+	/* Logout of the fabric */
+	fc_fabric_logoff(lp);
+
+	if (vhba->net_type == NET_ETH)
+		mlx4_fip_ctrl_stop(vhba);
+
+	spin_lock_irqsave(&fc_port->lock, flags);
+	list_del(&vhba->list);
+	spin_unlock_irqrestore(&fc_port->lock, flags);
+
+	fc_linkdown(lp);
+
+	mfc_destroy_fcmd(vhba);
+
+	mfc_destroy_rfci(vhba, &vhba->rfci[RFCI_CTRL]);
+	if (vhba->rfci[RFCI_DATA].fc_mac_idx != -1)
+		mfc_destroy_rfci(vhba, &vhba->rfci[RFCI_DATA]);
+	if (vhba->fc_vlan_id != -1)
+		mlx4_unregister_vlan(mfc_dev->dev, port, vhba->fc_vlan_idx);
+	mfc_bitmap_slot_free(&fc_port->fexch_bulk_bm, idx);
+
+	mfc_libfc_destroy(vhba->lp);
+	mfc_lport_destroy(lp);
+	scsi_host_put(lp->host);
+}
+
+int mfc_init_port(struct mfc_dev *mfc_dev, int port)
+{
+	struct mfc_port *mfc_port = &mfc_dev->mfc_port[port];
+	int err = 0;
+	int mvp = (1 << mfc_dev->log_num_mac) * (1 << mfc_dev->log_num_vlan) *
+	    (1 << mfc_dev->log_num_prio);
+	struct mfc_basic_config_params params = { 0 };
+	int count = 0;
+	char wq_name[16];
+
+	memset(&mfc_port->npid_table, 0,
+	       sizeof(struct nport_id) * MFC_NUM_NPORT_IDS);
+	mfc_port->port = port;
+	mfc_port->mfc_dev = mfc_dev;
+	mfc_port->lock = __SPIN_LOCK_UNLOCKED(mfc_port->lock);
+	INIT_LIST_HEAD(&mfc_port->vhba_list);
+	mfc_port->num_fexch_qps =
+	    (1 << mfc_log_exch_per_vhba) * max_vhba_per_port;
+	mfc_port->log_num_fexch_per_vhba = mfc_log_exch_per_vhba;
+	err = mlx4_qp_reserve_range(mfc_dev->dev, mfc_port->num_fexch_qps,
+				    MFC_MAX_PORT_FEXCH,
+				    &mfc_port->base_fexch_qpn);
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"Could not allocate QP range for FEXCH."
+			" Need 0x%x QPs aligned to 0x%x on port %d\n",
+			mfc_port->num_fexch_qps, MFC_MAX_PORT_FEXCH, port);
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	/* TODO: for bidirectional SCSI we'll need to double the amount of
+	   reserved MPTs, with proper spanning */
+	err = mlx4_mr_reserve_range(mfc_dev->dev, mfc_port->num_fexch_qps,
+				    2 * MFC_MAX_PORT_FEXCH,
+				    &mfc_port->base_fexch_mpt);
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"Could not allocate MPT range for FEXCH."
+			" Need 0x%x MPTs aligned to 0x%x on port %d\n",
+			mfc_port->num_fexch_qps, 2 * MFC_MAX_PORT_FEXCH, port);
+		err = -ENOMEM;
+		goto err_free_qp_range;
+	}
+
+	switch (mfc_dev->dev->caps.port_type[port]) {
+	case MLX4_PORT_TYPE_IB:
+		count = max_vhba_per_port;
+		break;
+	case MLX4_PORT_TYPE_ETH:
+		count = mvp;
+		break;
+	default:
+		err = 1;
+		goto err_free_qp_range;
+	}
+
+	err = mlx4_qp_reserve_range(mfc_dev->dev, count, count,
+				    &mfc_port->base_rfci_qpn);
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"Could not allocate QP range for RFCIs."
+			" Need 0x%x QPs naturally aligned on port %d\n",
+			max_vhba_per_port, port);
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	params.rfci_base = mfc_port->base_rfci_qpn;
+	params.fexch_base = mfc_port->base_fexch_qpn;
+	params.fexch_base_mpt = mfc_port->base_fexch_mpt;
+	params.nm = mfc_port->n_m = mfc_dev->log_num_mac;
+	params.nv = mfc_port->n_v = mfc_dev->log_num_vlan;
+	params.np = mfc_port->n_p = mfc_dev->log_num_prio;
+	params.log_num_rfci = ilog2(count);
+	params.def_fcoe_promisc_qpn = 0x77;
+	params.def_fcoe_mcast_qpn = 0x78;
+
+	dev_info(mfc_dev->dma_dev,
+		 "port %d b_fexch=0x%x, n_fexch=0x%x, b_mpt=0x%x,"
+		 " b_rfci=0x%x, num_rfci=0x%x\n",
+		 port, mfc_port->base_fexch_qpn, mfc_port->num_fexch_qps,
+		 mfc_port->base_fexch_mpt, mfc_port->base_rfci_qpn, count);
+
+	err = mlx4_CONFIG_FC_BASIC(mfc_dev->dev, port, &params);
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"Failed issue CONFIG_FC Basic on port %d\n", port);
+		goto err_free_mr_range;
+	}
+
+	err = mfc_bitmap_alloc(&mfc_port->fexch_bulk_bm,
+			       mfc_port->num_fexch_qps >> mfc_port->
+			       log_num_fexch_per_vhba);
+
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"Failed alloc fexch bulks bitmap on port %d\n", port);
+		goto err_free_mr_range;
+	}
+
+	snprintf(wq_name, 16, "rfci_wq_%d_%d", mfc_dev_idx, port);
+
+	mfc_port->rfci_wq = create_singlethread_workqueue(wq_name);
+	if (!mfc_port->rfci_wq)
+		goto err_free_qp_range;
+
+	snprintf(wq_name, 16, "async_wq_%d_%d", mfc_dev_idx, port);
+	mfc_port->async_wq = create_singlethread_workqueue(wq_name);
+	if (!mfc_port->async_wq)
+		goto err_free_wq;
+
+	mfc_port->initialized = 1;
+	mfc_port_create_dentry(mfc_port);
+
+	return 0;
+
+err_free_wq:
+	destroy_workqueue(mfc_port->rfci_wq);
+err_free_qp_range:
+	mlx4_qp_release_range(mfc_dev->dev, mfc_port->base_fexch_qpn,
+			      mfc_port->num_fexch_qps);
+err_free_mr_range:
+	mlx4_mr_release_range(mfc_dev->dev, mfc_port->base_fexch_mpt,
+			      mfc_port->num_fexch_qps);
+err_out:
+	return err;
+}
+
+void mfc_free_port(struct mfc_dev *mfc_dev, int port)
+{
+	struct mfc_port *fc_port = &mfc_dev->mfc_port[port];
+	struct mfc_vhba *vhba, *tmp;
+
+	mfc_port_delete_dentry(fc_port);
+	fc_port->initialized = 0;
+
+	flush_workqueue(fc_port->rfci_wq);
+	flush_workqueue(fc_port->async_wq);
+
+	list_for_each_entry_safe(vhba, tmp, &fc_port->vhba_list, list)
+	    mfc_remove_vhba(vhba);
+
+	/*
+	 * make sure the bitmap is empty, meaning, no vhba's left using
+	 * fexch bulk
+	 */
+	mfc_bitmap_free(&fc_port->fexch_bulk_bm);
+	mlx4_qp_release_range(mfc_dev->dev, fc_port->base_fexch_qpn,
+			      fc_port->num_fexch_qps);
+	mlx4_mr_release_range(mfc_dev->dev, fc_port->base_fexch_mpt,
+			      fc_port->num_fexch_qps);
+
+	destroy_workqueue(fc_port->rfci_wq);
+	destroy_workqueue(fc_port->async_wq);
+}
+
+static void *mfc_add_dev(struct mlx4_dev *dev)
+{
+	struct mfc_dev *mfc_dev;
+	int port;
+	int err;
+	unsigned long flags;
+	int pre_t11_enable = 0;
+	int t11_supported = 0;
+
+	dev_info(&dev->pdev->dev, "Adding device[%d] %.*s at %s\n",
+		 mfc_dev_idx + 1, MLX4_BOARD_ID_LEN, dev->board_id,
+		 dev_driver_string(&dev->pdev->dev));
+
+	mfc_dev = kzalloc(sizeof(struct mfc_dev), GFP_KERNEL);
+	if (!mfc_dev) {
+		dev_err(&dev->pdev->dev, "Alloc mfc_dev failed\n");
+		goto err_out;
+	}
+
+	mfc_dev->idx = mfc_dev_idx++;
+
+	err = mlx4_pd_alloc(dev, &mfc_dev->priv_pdn);
+	if (err) {
+		dev_err(&dev->pdev->dev, "PD alloc failed %d\n", err);
+		goto err_free_dev;
+	}
+
+	err = mlx4_mr_alloc(dev, mfc_dev->priv_pdn, 0, ~0ull,
+			    MLX4_PERM_LOCAL_WRITE | MLX4_PERM_LOCAL_READ, 0, 0,
+			    &mfc_dev->mr);
+	if (err) {
+		dev_err(&dev->pdev->dev, "mr alloc failed %d\n", err);
+		goto err_free_pd;
+	}
+
+	err = mlx4_mr_enable(dev, &mfc_dev->mr);
+	if (err) {
+		dev_err(&dev->pdev->dev, "mr enable failed %d\n", err);
+		goto err_free_mr;
+	}
+
+	if (mlx4_uar_alloc(dev, &mfc_dev->priv_uar))
+		goto err_free_mr;
+
+	mfc_dev->uar_map =
+	    ioremap(mfc_dev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+	if (!mfc_dev->uar_map)
+		goto err_free_uar;
+
+	MLX4_INIT_DOORBELL_LOCK(&mfc_dev->uar_lock);
+
+	INIT_LIST_HEAD(&mfc_dev->pgdir_list);
+	mutex_init(&mfc_dev->pgdir_mutex);
+
+	mfc_dev->dev = dev;
+	mfc_dev->dma_dev = &dev->pdev->dev;
+	mfc_dev->log_num_mac = dev->caps.log_num_macs;
+	mfc_dev->log_num_vlan = dev->caps.log_num_vlans;
+	mfc_dev->log_num_prio = dev->caps.log_num_prios;
+
+	mlx4_get_fc_t11_settings(dev, &pre_t11_enable, &t11_supported);
+
+	if (pre_t11_enable) {
+		mfc_t11_mode = 0;
+		dev_info(&dev->pdev->dev, "Starting FC device PRE-T11 mode\n");
+	} else if (t11_supported && !pre_t11_enable) {
+		mfc_t11_mode = 1;
+		dev_info(mfc_dev->dma_dev, "Starting FC device T11 mode\n");
+	} else {
+		dev_err(mfc_dev->dma_dev, "FAIL start fc device in T11 mode, "
+			"please enable PRE-T11 in mlx4_core\n");
+		goto err_free_uar;
+	}
+
+	for (port = 1; port <= mfc_dev->dev->caps.num_ports; port++) {
+		err = mfc_init_port(mfc_dev, port);
+		if (err)
+			goto err_free_ports;
+	}
+
+	spin_lock_irqsave(&mfc_dev_list_lock, flags);
+	list_add(&mfc_dev->list, &mfc_dev_list);
+	spin_unlock_irqrestore(&mfc_dev_list_lock, flags);
+
+	return mfc_dev;
+
+err_free_ports:
+	while (--port)
+		mfc_free_port(mfc_dev, port);
+	iounmap(mfc_dev->uar_map);
+err_free_uar:
+	mlx4_uar_free(dev, &mfc_dev->priv_uar);
+err_free_mr:
+	mlx4_mr_free(mfc_dev->dev, &mfc_dev->mr);
+err_free_pd:
+	mlx4_pd_free(dev, mfc_dev->priv_pdn);
+err_free_dev:
+	kfree(mfc_dev);
+err_out:
+	return NULL;
+}
+
+static void mfc_remove_dev(struct mlx4_dev *dev, void *fcdev_ptr)
+{
+	struct mfc_dev *mfc_dev = fcdev_ptr;
+	int port;
+	unsigned long flags;
+
+	dev_info(&dev->pdev->dev, "%.*s: removing\n", MLX4_BOARD_ID_LEN,
+		 dev->board_id);
+
+	spin_lock_irqsave(&mfc_dev_list_lock, flags);
+	list_del(&mfc_dev->list);
+	spin_unlock_irqrestore(&mfc_dev_list_lock, flags);
+
+	for (port = 1; port <= mfc_dev->dev->caps.num_ports; port++)
+		mfc_free_port(mfc_dev, port);
+
+	iounmap(mfc_dev->uar_map);
+	mlx4_uar_free(dev, &mfc_dev->priv_uar);
+	mlx4_mr_free(dev, &mfc_dev->mr);
+	mlx4_pd_free(dev, mfc_dev->priv_pdn);
+
+	kfree(mfc_dev);
+}
+
+static inline struct mfc_vhba *find_vhba_for_netdev(struct net_device *netdev)
+{
+	struct mfc_dev *mfc_dev;
+	struct mfc_port *fc_port;
+	struct mfc_vhba *vhba;
+	int p;
+	unsigned long flags2;
+
+	spin_lock_irqsave(&mfc_dev_list_lock, flags2);
+	list_for_each_entry(mfc_dev, &mfc_dev_list, list)
+	    for (p = 1; p <= MLX4_MAX_PORTS; ++p) {
+		unsigned long flags;
+		fc_port = &mfc_dev->mfc_port[p];
+		if (!fc_port->initialized)
+			continue;
+		spin_lock_irqsave(&fc_port->lock, flags);
+		list_for_each_entry(vhba, &fc_port->vhba_list, list)
+		    if (vhba->underdev == netdev) {
+			spin_unlock_irqrestore(&fc_port->lock, flags);
+			spin_unlock_irqrestore(&mfc_dev_list_lock, flags2);
+			return vhba;
+		}
+		spin_unlock_irqrestore(&fc_port->lock, flags);
+	}
+	spin_unlock_irqrestore(&mfc_dev_list_lock, flags2);
+	return NULL;
+}
+
+static void mfc_link_change(struct mfc_vhba *vhba, int link_up)
+{
+	struct fc_lport *lp = vhba->lp;
+
+	if (link_up) {
+		if (vhba->net_type == NET_ETH)
+			fcoe_ctlr_link_up(&vhba->ctlr);
+
+		fc_linkup(lp);
+	} else {
+		if (vhba->net_type == NET_ETH)
+			fcoe_ctlr_link_down(&vhba->ctlr);
+
+		fc_linkdown(lp);
+	}
+}
+
+static void mfc_link_work(struct work_struct *work)
+{
+	struct mfc_vhba *vhba =
+	    container_of(work, struct mfc_vhba, delayed_work.work);
+
+	if (!vhba->link_up)
+		vhba->need_reset = 1;
+	mfc_link_change(vhba, vhba->link_up);
+}
+
+static void mfc_async_event(struct mlx4_dev *dev, void *mfc_dev_ptr,
+			    enum mlx4_dev_event event, int port)
+{
+	struct mfc_dev *mfc_dev = (struct mfc_dev *)mfc_dev_ptr;
+	struct mfc_port *fc_port = &mfc_dev->mfc_port[port];
+	struct mfc_vhba *vhba, *tmp;
+	int link_up;
+
+	switch (event) {
+	case MLX4_DEV_EVENT_PORT_UP:
+		link_up = 1;
+		break;
+	case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
+	case MLX4_DEV_EVENT_PORT_DOWN:
+		link_up = 0;
+		break;
+	case MLX4_DEV_EVENT_PORT_REINIT:
+	default:
+		return;
+	}
+
+	list_for_each_entry_safe(vhba, tmp, &fc_port->vhba_list, list) {
+		if (vhba->link_up != link_up) {
+			vhba->link_up = link_up;
+
+			cancel_delayed_work(&vhba->delayed_work);
+			dev_warn(&dev->pdev->dev,
+				 "link %s on vhba %d port %d\n",
+				 (link_up ? "UP" : "DOWN"), vhba->idx, port);
+			queue_delayed_work(fc_port->async_wq,
+					   &vhba->delayed_work,
+					   MFC_ASYNC_DELAY);
+		}
+	}
+}
+
+static int mfc_register_netdev(struct net_device *netdev, int vlan_id, int prio)
+{
+	struct mfc_vhba *vhba;
+	struct mlx4_dev *dev;
+	int port;
+	struct mfc_dev *mfc_dev;
+	struct net_device *tmp_netdev, *query_netdev;
+	int err;
+	unsigned long flags;
+	u64 wwn, wwpn, wwnn;
+	int found;
+
+	vhba = find_vhba_for_netdev(netdev);
+	if (vhba) {
+		dev_info(vhba->mfc_port->mfc_dev->dma_dev,
+			 "warning: already got vhba for %s. skipping\n",
+			 netdev->name);
+		return 0;
+	}
+
+	tmp_netdev = (netdev->priv_flags & IFF_802_1Q_VLAN) ?
+		     vlan_dev_real_dev(netdev) : netdev;
+
+	spin_lock_irqsave(&mfc_dev_list_lock, flags);
+	list_for_each_entry(mfc_dev, &mfc_dev_list, list) {
+		dev = mfc_dev->dev;
+		for (port = 1; port <= dev->caps.num_ports; ++port) {
+			query_netdev = mlx4_get_prot_dev(dev, MLX4_PROT_EN,
+							 port);
+			if (query_netdev == tmp_netdev) {
+				found = 1;
+				goto unlock;
+			}
+		}
+	}
+unlock:
+	spin_unlock_irqrestore(&mfc_dev_list_lock, flags);
+
+	if (!found) {
+		printk(KERN_ERR PFX "%s does not belong to mlx4_en.\n",
+		       netdev->name);
+		err = -EINVAL;
+		goto out;
+	}
+
+	dev_info(&dev->pdev->dev,
+		 "%s belongs to mlx4_en. port=%d\n", netdev->name, port);
+
+	wwn = mfc_dev->dev->caps.def_mac[port];
+	wwnn = wwn | ((u64) 0x10 << 56);
+	wwpn = wwn | ((u64) 0x20 << 56);
+
+	err = mfc_create_vhba(&mfc_dev->mfc_port[port], netdev->mtu, vlan_id,
+			      prio, -1, 0, 0, 0, netdev, netdev->name,
+			      0, NULL, NET_ETH, wwpn, wwnn);
+	if (err)
+		dev_err(&dev->pdev->dev,
+			"Could not create vhba for net device %s vlan %d\n",
+			netdev->name, vlan_id);
+out:
+	return err;
+}
+
+static int mfc_unregister_netdev(struct net_device *netdev)
+{
+	struct mfc_vhba *vhba;
+
+	vhba = find_vhba_for_netdev(netdev);
+	if (!vhba) {
+		printk(KERN_ERR PFX "No vhba for %s. skipping.\n",
+		       netdev->name);
+		return 0;
+	}
+
+	mfc_remove_vhba(vhba);
+	return 0;
+}
+
+static struct mlx4_interface mfc_interface = {
+	.add = mfc_add_dev,
+	.remove = mfc_remove_dev,
+	.event = mfc_async_event
+};
+
+static void trimstr(char *str, int len)
+{
+	char *cp = str + len;
+	while (--cp >= str && *cp == '\n')
+		*cp = '\0';
+}
+
+static ssize_t mfc_sys_destroy(struct class *cl, struct class_attribute *attr,
+			       const char *buf, size_t count)
+{
+	char ifname[IFNAMSIZ];
+	struct net_device *netdev = NULL;
+
+	strncpy(ifname, buf, sizeof(ifname));
+	trimstr(ifname, strlen(ifname));
+
+	netdev = dev_get_by_name(&init_net, ifname);
+	if (!netdev) {
+		printk(KERN_ERR "Couldn't get a network device for '%s'",
+		       ifname);
+		goto out;
+	}
+
+	mfc_unregister_netdev(netdev);
+
+out:
+	if (netdev)
+		dev_put(netdev);
+	return count;
+}
+
+static CLASS_ATTR(destroy, 0222, NULL, mfc_sys_destroy);
+
+static ssize_t mfc_sys_create(struct class *cl, struct class_attribute *attr,
+			      const char *buf, size_t count)
+{
+	char ifname[IFNAMSIZ + 1];
+	char *ch;
+	char test;
+	int cnt = 0;
+	int vlan_id = -1;
+	int prio = 0;
+	struct net_device *netdev = NULL;
+
+	strncpy(ifname, buf, sizeof(ifname));
+	trimstr(ifname, strlen(ifname));
+
+	ch = strchr(ifname, ',');
+	if (ch) {
+		*ch = '\0';
+		cnt = sscanf(ch + 1, "%d%c", &prio, &test);
+		if (cnt != 1 || prio < 0 || prio > 7)
+			prio = 0;
+	}
+
+	netdev = dev_get_by_name(&init_net, ifname);
+	if (!netdev) {
+		printk(KERN_ERR "Couldn't get a network device for '%s'\n",
+		       ifname);
+		goto out;
+	}
+	if (netdev->priv_flags & IFF_802_1Q_VLAN) {
+		vlan_id = vlan_dev_vlan_id(netdev);
+		printk(KERN_INFO PFX "vlan id %d prio %d\n", vlan_id, prio);
+		if (vlan_id < 0)
+			goto out;
+	}
+
+	mfc_register_netdev(netdev, vlan_id, prio);
+
+out:
+	if (netdev)
+		dev_put(netdev);
+	return count;
+}
+
+static CLASS_ATTR(create, 0222, NULL, mfc_sys_create);
+
+static ssize_t mfc_sys_create_ib(struct class *cl, struct class_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct mfc_dev *mfc_dev;
+	struct mfc_port *fc_port;
+	int p;
+	unsigned long flags2;
+	int idx, board_idx, port, lid, sl;
+	unsigned int mtu;
+	unsigned long ctrl_qpn, data_qpn;
+	char symbolic_name[] = "IB0P1";
+	u64 wwn, wwpn, wwnn;
+
+	if (7 != sscanf(buf, "%x,%x,%x,%x,%x,%lx,%lx,%x",
+			&idx, &board_idx, &port, &mtu, &lid, &ctrl_qpn,
+			&data_qpn, &sl)) {
+		printk(KERN_ERR
+		       "Bad string. All should be in hex without 0x: vhba_idx,"
+		       " board_idx, port ,mtu, lid, ctrl_qpn, data_qpn, sl\n");
+		return count;
+	}
+
+	p = 0;
+	spin_lock_irqsave(&mfc_dev_list_lock, flags2);
+	list_for_each_entry(mfc_dev, &mfc_dev_list, list) {
+		if (p == board_idx)
+			break;
+		++p;
+	}
+	spin_unlock_irqrestore(&mfc_dev_list_lock, flags2);
+
+	if (p < board_idx) {
+		printk(KERN_ERR PFX "Has only %d boards\n", p);
+		return count;
+	}
+
+	if ((port < 1) || (port > MLX4_MAX_PORTS)) {
+		printk(KERN_ERR PFX "Port should be in range 1-%d\n",
+		       MLX4_MAX_PORTS);
+		return count;
+	}
+
+	fc_port = &mfc_dev->mfc_port[port];
+	if (!fc_port->initialized) {
+		printk(KERN_ERR PFX "Port is not yet initialized for FCoIB\n");
+		return count;
+	}
+
+	wwn = mfc_dev->dev->caps.def_mac[fc_port->port];
+	wwnn = wwn | ((u64) 0x10 << 56);
+	wwpn = wwn | ((u64) (idx & 0xff) << 48) | ((u64) 0x20 << 56);
+
+	snprintf(symbolic_name, sizeof(symbolic_name), "IB%1dP%1d",
+		 board_idx, port);
+
+	mfc_create_vhba(fc_port, mtu, -1, -1, lid, ctrl_qpn, data_qpn, sl,
+			NULL, symbolic_name, 0, NULL, NET_IB, wwpn, wwnn);
+
+	return count;
+}
+
+static CLASS_ATTR(create_ib, 0222, NULL, mfc_sys_create_ib);
+
+int fcoib_create_vhba(struct ib_device *ib_device,
+		      u8 port, unsigned int mtu, u16 gw_lid, u8 sl,
+		      u64 gw_discovery_handle,
+		      fcoib_send_els_cb fcoib_send_els_cb, u64 wwpn, u64 wwnn)
+{
+	struct mfc_dev *mfc_dev;
+	struct mfc_port *fc_port;
+	struct mlx4_dev *mlxdev;
+	struct ib_device *ib_device_itr;
+	char symbolic_name[] = "IB0P1";
+	unsigned long flags;
+	int found;
+	int err = 0;
+
+	/* port number can be 1 or 2 */
+	if ((port < 1) || (port > MLX4_MAX_PORTS)) {
+		printk(KERN_ALERT "Port should be in range 1-%d\n",
+		       MLX4_MAX_PORTS);
+		return -1;
+	}
+
+	/* find the corresponding FC device from the IB device */
+	found = 0;
+	spin_lock_irqsave(&mfc_dev_list_lock, flags);
+	list_for_each_entry(mfc_dev, &mfc_dev_list, list) {
+		mlxdev = mfc_dev->dev;
+		ib_device_itr = mlx4_get_prot_dev(mlxdev, MLX4_PROT_IB, port);
+		if (ib_device == ib_device_itr) {
+			found = 1;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&mfc_dev_list_lock, flags);
+
+	if (!found) {
+		printk(KERN_ALERT "mlx4_fc: coudln't find match "
+		       "ib_dev to fc_dev\n");
+		return -1;
+	}
+
+	fc_port = &mfc_dev->mfc_port[port];
+	if (!fc_port || !fc_port->initialized) {
+		printk(KERN_ALERT "Port is not yet initialized for FCoIB\n");
+		return -1;
+	}
+
+	/* TODO: check how symbolic name should be built */
+	snprintf(symbolic_name, sizeof(symbolic_name),
+		 "IB%1dP%1d", 0, port);
+
+	err = mfc_create_vhba(fc_port, mtu, -1, -1, gw_lid, CTRL_QPN,
+			      DATA_QPN, sl, NULL, symbolic_name,
+			      gw_discovery_handle, fcoib_send_els_cb, NET_IB,
+			      wwpn, wwnn);
+	if (err) {
+		printk(KERN_ALERT "FAIL: create vhba\n");
+		return err;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(fcoib_create_vhba);
+
+void fcoib_destroy_vhba(u64 gw_fc_handle)
+{
+	struct mfc_vhba *vhba = (struct mfc_vhba *)gw_fc_handle;
+
+	if (!vhba->mfc_port->initialized)
+		return;
+
+	mfc_remove_vhba(vhba);
+}
+EXPORT_SYMBOL(fcoib_destroy_vhba);
+
+void fcoib_get_vhba_fcid(u64 gw_fc_handle, uint8_t *fcid)
+{
+	struct mfc_vhba *vhba = (struct mfc_vhba *)gw_fc_handle;
+
+	memcpy(fcid, vhba->my_npid.fid, 3);
+}
+EXPORT_SYMBOL(fcoib_get_vhba_fcid);
+
+static ssize_t mfc_sys_link_change(struct class *cl, const char *buf,
+				   size_t count, int link_up)
+{
+	struct mfc_vhba *vhba;
+	struct net_device *netdev = NULL;
+	char ifname[IFNAMSIZ];
+
+	strncpy(ifname, buf, sizeof(ifname));
+	trimstr(ifname, strlen(ifname));
+
+	netdev = dev_get_by_name(&init_net, ifname);
+	if (!netdev) {
+		printk(KERN_ERR PFX "Couldn't get a network device for '%s'",
+		       ifname);
+		goto out;
+	}
+
+	vhba = find_vhba_for_netdev(netdev);
+	if (!vhba) {
+		printk(KERN_ERR PFX "vhba for '%s' doesn't exist - ignoring\n",
+		       ifname);
+		goto out;
+	}
+
+	mfc_link_change(vhba, link_up);
+
+out:
+	if (netdev)
+		dev_put(netdev);
+	return count;
+}
+
+static ssize_t mfc_sys_link_up(struct class *cl, struct class_attribute *attr,
+			       const char *buf, size_t count)
+{
+	return mfc_sys_link_change(cl, buf, count, 1);
+}
+
+static CLASS_ATTR(link_up, 0222, NULL, mfc_sys_link_up);
+
+static ssize_t mfc_sys_link_down(struct class *cl, struct class_attribute *attr,
+				 const char *buf, size_t count)
+{
+	return mfc_sys_link_change(cl, buf, count, 0);
+}
+
+static CLASS_ATTR(link_down, 0222, NULL, mfc_sys_link_down);
+
+struct class *mfc_class;
+
+struct class_attribute *class_attrs[] = {
+	&class_attr_link_up,
+	&class_attr_link_down,
+	&class_attr_create,
+	&class_attr_create_ib,
+	&class_attr_destroy,
+	NULL
+};
+
+int mfc_reset(struct Scsi_Host *shost)
+{
+	struct fc_lport *lp = shost_priv(shost);
+	struct mfc_vhba *vhba = lport_priv(lp);
+	int err = 0;
+
+	dev_info(vhba->mfc_port->mfc_dev->dma_dev,
+		 "Reset port%d vhba%d\n", vhba->mfc_port->port, vhba->idx);
+
+	vhba->need_reset = 1;
+	err = mfc_lld_reset(lp);
+	if (err)
+		goto out;
+
+	fc_lport_reset(lp);
+
+out:
+	return err;
+}
+
+static int mfc_lld_reset(struct fc_lport *lp)
+{
+	struct mfc_vhba *vhba = lport_priv(lp);
+	struct mfc_dev *mfc_dev = vhba->mfc_port->mfc_dev;
+	struct mlx4_caps *caps = &mfc_dev->dev->caps;
+	int port = vhba->mfc_port->port;
+	int err = 0;
+
+	if (!vhba->need_reset)
+		return -EINVAL;
+
+	dev_info(mfc_dev->dma_dev,
+		 "lld reset on port%d vhba%d link_up=%d\n",
+		 port, vhba->idx, vhba->link_up);
+
+	/* destroy data rfci - will be created on flogi accept */
+	if ((vhba->net_type == NET_ETH && !mfc_debug_mode) &&
+	    (vhba->rfci[RFCI_DATA].fc_mac_idx != -1)) {
+		vhba->rfci[RFCI_DATA].fc_qp.is_flushing = 1;
+		err = mfc_destroy_rfci(vhba, &vhba->rfci[RFCI_DATA]);
+		if (err) {
+			dev_err(mfc_dev->dma_dev,
+				"Failed to destroy DATA RFCI port%d vhba%d"
+				" err=%d\n", port, vhba->idx, err);
+			goto out;
+		}
+	}
+
+	vhba->rfci[RFCI_CTRL].fc_qp.is_flushing = 1;
+
+	/* destroy create and init ctrl rfci */
+	err = mfc_destroy_rfci(vhba, &vhba->rfci[RFCI_CTRL]);
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"Failed to destroy CTRL RFCI on port%d vhba%d err=%d\n",
+			port, vhba->idx, err);
+		goto out;
+	}
+
+	err =
+	    mfc_create_rfci(vhba, &vhba->rfci[RFCI_CTRL], caps->def_mac[port]);
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"port%d vhba%d: Could not create CTRL RFCI, err=%d\n",
+			port, vhba->idx, err);
+		goto out;
+	}
+
+	err = mfc_init_rfci(vhba, &vhba->rfci[RFCI_CTRL]);
+	if (err) {
+		mfc_destroy_rfci(vhba, &vhba->rfci[RFCI_CTRL]);
+		dev_err(mfc_dev->dma_dev,
+			"port%d vhba%d: Could not init CTRL RFCI, err=%d\n",
+			port, vhba->idx, err);
+		goto out;
+	}
+
+	vhba->rfci[RFCI_CTRL].fc_qp.is_flushing = 0;
+
+	/* destroy and create fcmd - will be init on flogi accept */
+	if (vhba->fcmd.fc_qp.is_created)
+		if (mfc_reset_fcmd(vhba))
+			dev_err(mfc_dev->dma_dev, "reset_fcmd failed\n");
+	vhba->flogi_finished = 0;
+	vhba->need_reset = 0;
+
+out:
+	return err;
+}
+
+/*
+ * Scsi handler for fexch abort.
+ * After calling this function scsi will destroy the cmd.
+ * So if there is our abort running it will fail.
+ */
+static int mfc_abort(struct scsi_cmnd *cmd)
+{
+	struct mfc_exch *fexch;
+	struct mfc_vhba *vhba;
+	struct fc_rport *rp;
+	struct fc_lport *lp;
+	int rc = FAILED;
+
+	lp = shost_priv(cmd->device->host);
+	if (!lp || lp->state != LPORT_ST_READY)
+		goto out;
+	else if (!lp->link_up)
+		goto out;
+
+	fexch = (struct mfc_exch *)cmd->SCp.ptr;
+	if (!fexch)
+		goto out;
+
+	vhba = fexch->vhba;
+	if (!vhba || !vhba->link_up)
+		goto out;
+
+	spin_lock_irq(lp->host->host_lock);
+
+	rp = starget_to_rport(scsi_target(fexch->scmd->device));
+	if (fc_remote_port_chkready(rp)) {
+		spin_unlock_irq(lp->host->host_lock);
+		goto out;
+	}
+
+	init_completion(&fexch->tm_done);
+
+	fexch->state = FEXCH_SEND_ABORT;
+
+	spin_unlock_irq(lp->host->host_lock);
+
+	/* Send ABTS for current fexch */
+	if (mfc_send_abort_tsk(fexch, rp->port_id))
+		goto out;
+
+	rc = wait_for_completion_timeout(&fexch->tm_done, MFC_CMD_TIMEOUT);
+
+	if (!rc) {
+		dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+			"target abort cmd timeout\n");
+		rc = FAILED;
+	} else if (fexch->state == FEXCH_ABORT) {
+		dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+			"target abort cmd done\n");
+		cmd->result = DID_ABORT << 16;
+		rc = SUCCESS;
+		mfc_reset_fexch(vhba, fexch);
+	} else if (fexch->state == FEXCH_CMD_DONE) {
+		dev_err(vhba->mfc_port->mfc_dev->dma_dev, "target cmd done\n");
+		rc = SUCCESS;
+		mfc_reset_fexch(vhba, fexch);
+		cmd->scsi_done(cmd);
+	} else
+		rc = FAILED;
+out:
+	return rc;
+}
+
+static int mfc_device_reset(struct scsi_cmnd *cmd)
+{
+	struct mfc_exch *fexch;
+	struct mfc_vhba *vhba;
+
+	fexch = (struct mfc_exch *)cmd->SCp.ptr;
+	if (!fexch)
+		return FAILED;
+
+	vhba = fexch->vhba;
+	if (!vhba || !vhba->link_up)
+		return FAILED;
+
+	dev_info(vhba->mfc_port->mfc_dev->dma_dev,
+		 "device reset function called\n");
+
+	return FAILED;
+}
+
+static int mfc_host_reset(struct scsi_cmnd *cmd)
+{
+	return (mfc_reset(cmd->device->host)) ? FAILED : SUCCESS;
+}
+
+struct fc_function_template mfc_transport_function = {
+	.show_host_node_name = 1,
+	.show_host_port_name = 1,
+	.show_host_supported_classes = 1,
+	.show_host_supported_fc4s = 1,
+	.show_host_active_fc4s = 1,
+	.show_host_maxframe_size = 1,
+
+	.show_host_port_id = 1,
+	.show_host_supported_speeds = 1,
+	.get_host_speed = fc_get_host_speed,
+	.show_host_speed = 1,
+	.show_host_port_type = 1,
+	.get_host_port_state = fc_get_host_port_state,
+	.show_host_port_state = 1,
+	.show_host_symbolic_name = 1,
+
+	.dd_fcrport_size = sizeof(struct fc_rport_libfc_priv),
+	.show_rport_maxframe_size = 1,
+	.show_rport_supported_classes = 1,
+
+	.show_host_fabric_name = 1,
+	.show_starget_node_name = 1,
+	.show_starget_port_name = 1,
+	.show_starget_port_id = 1,
+	.set_rport_dev_loss_tmo = fc_set_rport_loss_tmo,
+	.show_rport_dev_loss_tmo = 1,
+	.get_fc_host_stats = fc_get_host_stats,
+	.issue_fc_host_lip = mfc_reset,
+	.terminate_rport_io = fc_rport_terminate_io,
+};
+
+static int __init mfc_init(void)
+{
+	int err = 0;
+	int i;
+
+	if (mfc_debug_mode) {
+		int r;
+		r = sscanf(gateway_mac,
+			   "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx",
+			   &gw_mac[0], &gw_mac[1], &gw_mac[2], &gw_mac[3],
+			   &gw_mac[4], &gw_mac[5]);
+		if (r != 6) {
+			printk(KERN_ERR "Bad gw_mac: %s. r=%d\n", gateway_mac,
+			       r);
+			return -1;
+		}
+	}
+
+	mfc_transport_template = fc_attach_transport(&mfc_transport_function);
+	if (mfc_transport_template == NULL) {
+		printk(KERN_ERR PFX "Fail to attach fc transport");
+		return -1;
+	}
+
+	err = mlx4_register_interface(&mfc_interface);
+	if (err)
+		return err;
+
+	mfc_class = class_create(THIS_MODULE, "mlx4_fc");
+	if (IS_ERR(mfc_class))
+		goto err_unreg;
+
+	for (i = 0; class_attrs[i]; i++) {
+		err = class_create_file(mfc_class, class_attrs[i]);
+		if (err) {
+			class_attrs[i] = NULL;
+			return err;
+		}
+	}
+
+	return 0;
+
+err_unreg:
+	mlx4_unregister_interface(&mfc_interface);
+
+	return err;
+}
+
+static void __exit mfc_cleanup(void)
+{
+	int i;
+
+	for (i = 0; class_attrs[i]; i++)
+		class_remove_file(mfc_class, class_attrs[i]);
+
+	class_destroy(mfc_class);
+	mlx4_unregister_interface(&mfc_interface);
+	fc_release_transport(mfc_transport_template);
+}
+
+module_init(mfc_init);
+module_exit(mfc_cleanup);
diff --git a/drivers/scsi/mlx4_fc/mfc.h b/drivers/scsi/mlx4_fc/mfc.h
new file mode 100644
index 0000000..6e7bd4c
--- /dev/null
+++ b/drivers/scsi/mlx4_fc/mfc.h
@@ -0,0 +1,666 @@
+/*
+ * Copyright (c) 2010 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef MFC_H
+#define MFC_H
+
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/skbuff.h>
+#include <linux/if_ether.h>
+#include <linux/interrupt.h>
+#include <linux/kobject.h>
+
+#include <linux/mlx4/driver.h>
+#include <linux/mlx4/device.h>
+#include <linux/mlx4/qp.h>
+#include <linux/mlx4/doorbell.h>
+#include <linux/mlx4/qp.h>
+#include <linux/mlx4/srq.h>
+#include <linux/mlx4/cq.h>
+#include <linux/mlx4/cmd.h>
+#include <linux/netdevice.h>
+
+#include <scsi/scsi_cmnd.h>
+#include <scsi/libfc.h>
+#include <scsi/libfcoe.h>
+#include <scsi/fc_frame.h>
+#include <scsi/fc/fc_fcp.h>
+#include <scsi/fc/fc_fcoe.h>
+
+#include "fcoib_api.h"
+
+#define MFC_CMD_TIMEOUT			(5 * HZ)
+#define MFC_MAX_LUN			255
+#define MFC_MAX_FCP_TARGET		256
+#define MFC_MAX_CMD_PER_LUN		16
+#define MFC_BIT_DESC_OWN		0x80000000
+#define MFC_RFCI_OP_SEND		0xa
+#define MFC_CMD_OP_SEND			0xd
+#define MFC_BIT_INS_VLAN		0x4000
+#define MFC_BIT_NO_ICRC			0x2
+#define MFC_BIT_TX_COMP			0xc
+#define MFC_BIT_TX_IP_CS		0x10
+#define MFC_BIT_TX_TCP_CS		0x20
+#define MFC_BIT_TX_FCRC_CS		0x40
+#define MFC_CQ_ARM_CMD			0x2
+#define MFC_CMD_CQ_ENTRIES		128
+#define MFC_RFCI_CQ_ENTRIES		128
+#define MFC_NUM_NPORT_IDS		128
+#define MFC_MAX_PORT_FEXCH		(64 * 1024)
+#define MFC_MAX_FMR_PAGES		512
+#define MFC_FMR_PAGE_SHIFT		9
+#define MFC_RFCI_RX_SKB_BUFSIZE		(PAGE_SIZE - 1024)
+#define MFC_CMD_RX_SKB_BUFSIZE		(PAGE_SIZE - 1024)
+#define MFC_ALLOC_ORDER			2
+#define MFC_ALLOC_SIZE			(PAGE_SIZE << MFC_ALLOC_ORDER)
+#define MFC_GW_ADDR_MODE		0x00
+#define MFC_FCOUI_ADDR_MODE		0x01
+#define MFC_ASYNC_DELAY			(HZ / 4)
+
+#define MLX4_CMD_CONFIG_FC		0x4a
+#define MLX4_CMD_SET_VLAN_FLTR		0x47
+#define MLX4_CMD_MOD_FC_ENABLE		0
+#define MLX4_CMD_MOD_FC_DISABLE		1
+#define MLX4_CMD_INMOD_BASIC_CONF	0x0000
+#define MLX4_CMD_INMOD_NPORT_TAB	0x0100
+#define MLX4_LINK_TYPE_IB		0
+#define MLX4_LINK_TYPE_ETH		1
+#define MLX4_MPT_ENABLE_INVALIDATE	(0x3 << 24)
+#define MLX4_FCOIB_QKEY			0x80020005
+#define MLX4_DEFAULT_FC_MTU		2112
+#define MLX4_DEFAULT_NUM_RESERVED_XIDS	256
+#define MLX4_DEFAULT_LOG_EXCH_PER_VHBA	10
+#define MLX4_DEFAULT_MAX_VHBA_PER_PORT			\
+	(1 << (16 - MLX4_DEFAULT_LOG_EXCH_PER_VHBA))
+
+/* aligned to cacheline (wqe bug), enough for 1 ctl + 1 dgram + 1 ds */
+#define RFCI_SQ_BB_SIZE			128
+#define RFCI_RQ_WQE_SIZE		sizeof(struct mfc_data_seg)
+
+/* 1 ctl + 1 IB addr + 1 fcp init + 1 ds = 96*/
+#define FCMD_SQ_BB_SIZE			128
+#define FCMD_RQ_NUM_WQES		1	/* minimum allowed 2^0 */
+#define FCMD_RQ_WQE_SIZE		16	/* minimum allowed 2^0 * 16 */
+#define FEXCH_SQ_NUM_BBS		1	/* minimum allowed 2^0 */
+#define FEXCH_SQ_BB_SIZE		16	/* minimum allowed 2^0 * 16 */
+#define FEXCH_RQ_WQE_SIZE		16	/* 1 ds */
+#define FEXCH_RQ_NUM_WQES		32
+#define VLAN_FLTR_SIZE			128
+#define VHBA_SYSFS_LEN			32
+#define FC_MAX_ERROR_CNT		5
+#define QPC_SERVICE_TYPE_RFCI		9
+#define QPC_SERVICE_TYPE_FCMD		4
+#define QPC_SERVICE_TYPE_FEXCH		5
+#define ETH_P_FIP			0x8914
+#define FCOIB_SIG			0x4000
+#define QUERY_PORT_LINK_MASK		0x80
+#define SQ_NO_PREFETCH			(1 << 7)
+#define DATA_QPN			0
+#define CTRL_QPN			0
+
+#define FCOE_WORD_TO_BYTE		4
+#define	FCOE_ENCAPS_LEN_SOF(len, sof)	((FC_FCOE_VER << 14) |	\
+					 (((len) & 0x3ff) << 4) | ((sof) & 0xf))
+#define	FCOE_DECAPS_LEN(n)		(((n) >> 4) & 0x3ff)
+#define	FCOE_DECAPS_SOF(n)		(((n) & 0x8) ? (((n) &	\
+					 0xf) + 0x20) : (((n) & 0xf) + 0x30))
+
+#define XNOR(x, y)			(!(x) == !(y))
+
+#define MLX4_PUT(dest, source, offset)				\
+do {								\
+	void *__d = ((char *) (dest) + (offset));		\
+	switch (sizeof(source)) {				\
+	case 1:							\
+		*(u8 *) __d = (source);				\
+		break;						\
+	case 2:							\
+		*(__be16 *) __d = cpu_to_be16(source);		\
+		break;						\
+	case 4:							\
+		*(__be32 *) __d = cpu_to_be32(source);		\
+		break;						\
+	case 8:							\
+		*(__be64 *) __d = cpu_to_be64(source);		\
+		break;						\
+	default:						\
+		BUG();						\
+	}							\
+} while (0)
+
+#define OFFSET_IN_PAGE(v)	((u64)(v) & (PAGE_SIZE - 1))
+#define SHIFT_TO_SIZE(x)	(1 << (x))
+#define SHIFT_TO_MASK(x)	(~((u64) SHIFT_TO_SIZE(x) - 1))
+
+#define MAC_PRINTF_FMT "%02x:%02x:%02x:%02x:%02x:%02x"
+#define MAC_PRINTF_VAR(m) m[0], m[1], m[2], m[3], m[4], m[5]
+
+#define mfc_q_info_get(q, index, type)				\
+	(*((type *)((q)->info + ((index) * sizeof(type)))))
+
+#define mlx4_from_ctlr(fc) container_of(fc, struct mfc_vhba, ctlr)
+
+struct mfc_vhba;
+
+struct fcoe_hdr_old {
+	__be16 fcoe_plen;	/* fc frame len and SOF */
+};
+
+struct fcoe_crc_eof_old {
+	__be32 fcoe_crc32;	/* CRC for FC packet */
+	u8 fcoe_eof;		/* EOF */
+} __attribute__ ((packed));
+
+enum mfc_cmd_io_dir {
+	FCMD_IO_DIR_TARGET = 0,
+	FCMD_IO_DIR_READ,
+	FCMD_IO_DIR_WRITE,
+	FCMD_IO_DIR_BIDI,
+};
+
+enum mfc_rfci_type {
+	RFCI_CTRL = 0,
+	RFCI_DATA,
+	RFCI_NUM		/* must be last */
+};
+
+struct mfc_basic_config_params {
+	__be32 fexch_base;
+	u8 nm, nv, np;
+	__be32 fexch_base_mpt;
+	u8 log_num_rfci;
+	__be32 rfci_base;
+	__be32 def_fcoe_promisc_qpn;
+	__be32 def_fcoe_mcast_qpn;
+};
+
+struct mfc_query_port_context {
+	u8 supported_port_type;
+	u8 actual_port_type;
+	__be16 mtu;
+	u32 reserved2[3];
+	__be64 mac;
+};
+
+struct mfc_set_vlan_fltr_mbox {
+	__be32 entry[VLAN_FLTR_SIZE];
+};
+
+struct mfc_exch_cqe {
+	__be32 my_qpn;
+	__be32 invalidate_key;
+	__be32 seq_id_rqpn_srq;
+	__be32 xmit_byte_count;
+	__be32 rcv_byte_count;
+	__be32 byte_cnt;
+	__be16 wqe_index;
+	__be16 seq_count;
+	u8 reserved[3];
+	u8 owner_sr_opcode;
+};
+
+enum mfc_en_link_state {
+	LINK_DOWN,
+	LINK_UP
+};
+
+enum mfc_net_type {
+	NET_IB = 1,
+	NET_ETH = 2,
+};
+
+struct mfc_bitmap {
+	unsigned long *addr;
+	unsigned size;
+	unsigned long last_bit;
+};
+
+typedef void (*comp_fn) (struct mfc_vhba *, struct mlx4_cqe *);
+
+struct mfc_cq {
+	struct mlx4_cq mcq;
+	struct mlx4_hwq_resources wqres;
+	int size;
+	int buf_size;
+	struct mfc_cqe *buf;
+	int size_mask;
+	char name[10];
+	struct mfc_vhba *vhba;
+	comp_fn comp_rx;
+	comp_fn comp_tx;
+};
+
+struct mfc_queue {
+	u32 size;
+	u32 size_mask;
+	u16 stride;
+	u32 prod;
+	u32 cons;
+	void *buf;
+	spinlock_t lock;
+	void *info;
+};
+
+struct mfc_qp {
+	struct mlx4_qp mqp;
+	u32 buf_size;
+	struct mlx4_hwq_resources wqres;
+	struct mfc_queue sq;
+	struct mfc_queue rq;
+	u32 doorbell_qpn;
+	int is_created;
+	int is_flushing;
+};
+
+struct mfc_rfci {
+	struct mfc_qp fc_qp;
+	struct mfc_cq fc_cq;
+	u8 mac[ETH_ALEN];
+	int fc_mac_idx;
+};
+
+struct mfc_cmd {
+	struct mfc_qp fc_qp;
+	struct mfc_cq fc_cq;
+};
+
+enum mfc_exch_state {
+	FEXCH_OK = 1,
+	FEXCH_CMD_DONE,
+	FEXCH_SEND_ABORT,
+	FEXCH_ABORT
+};
+
+struct mfc_exch {
+	struct mfc_vhba *vhba;
+	struct mfc_qp fc_qp;
+	struct mlx4_fmr fmr;
+	char *bouncebuff;
+	int bounce_off;
+	struct scatterlist bounce_sg[1];
+	int tx_completed;
+	int mtu;
+	int fcmd_wqe_idx;
+	u8 *response_buf;
+	struct scsi_cmnd *scmd;
+	struct completion tm_done;
+	enum mfc_exch_state state;
+};
+
+struct mfc_sysfs_attr {
+	void *ctx;
+	struct kobject *kobj;
+	unsigned long data;
+	char name[VHBA_SYSFS_LEN];
+	struct module_attribute mattr;
+	struct device *dev;
+};
+
+struct nport_id {
+	u8 reserved;
+	u8 fid[3];
+};
+
+/* represents a virtual HBA on a port */
+struct mfc_vhba {
+	struct list_head list;
+	struct fc_lport *lp;
+	struct mfc_port *mfc_port;
+	void *underdev;
+	int idx;
+	int fc_vlan_id;
+	int fc_vlan_idx;
+	int fc_vlan_prio;
+	struct mfc_rfci rfci[RFCI_NUM];
+	struct mfc_cmd fcmd;
+	struct mfc_exch *fexch;
+	struct mfc_bitmap fexch_bm;
+	int num_fexch;
+	struct mfc_cq fexch_cq[NR_CPUS];
+	int base_fexch_qpn;
+	int base_fexch_mpt;
+	int base_reserved_xid;
+	int num_reserved_xid;
+	enum mfc_net_type net_type;
+	u8 dest_addr[ETH_ALEN];
+	int dest_ib_lid;
+	unsigned long dest_ib_ctrl_qpn;
+	unsigned long dest_ib_data_qpn;
+	int dest_ib_sl;
+	int flogi_finished;
+	int link_up;
+	struct nport_id my_npid;
+	int fc_payload_size;
+	u16 flogi_oxid;
+	u8 flogi_progress;
+	u8 fcoe_hlen;
+	u8 rfci_rx_enabled;
+	u8 need_reset;
+	struct delayed_work delayed_work;
+
+	/* Saved libfc rport_login callback */
+	int (*fc_rport_login) (struct fc_rport_priv *rdata);
+
+	/* sysfs stuff */
+	struct mfc_sysfs_attr dentry;
+
+	/*handle & callback for FCoIB discovery */
+	u64 gw_discovery_handle;
+	fcoib_send_els_cb fcoib_send_els_cb;
+
+	/* fip stuff */
+	struct packet_type fip_packet_type;
+	struct fcoe_ctlr ctlr;
+
+	struct fc_exch_mgr *emp;
+};
+
+/* represents a physical port on HCA */
+struct mfc_port {
+	struct mfc_dev *mfc_dev;
+	u8 port;
+	u8 n_m;
+	u8 n_v;
+	u8 n_p;
+	int base_rfci_qpn;
+	int base_fexch_qpn;
+	int base_fexch_mpt;
+	int num_fexch_qps;
+	int log_num_fexch_per_vhba;
+	int initialized;
+	struct mfc_bitmap fexch_bulk_bm;
+	struct list_head vhba_list;
+	spinlock_t lock;
+	struct mfc_sysfs_attr dentry;
+	struct nport_id npid_table[MFC_NUM_NPORT_IDS];
+	struct workqueue_struct *rfci_wq;
+	struct workqueue_struct *async_wq;
+};
+
+/* represents a single HCA */
+struct mfc_dev {
+	struct list_head list;
+	struct mlx4_dev *dev;
+	struct mfc_port mfc_port[MLX4_MAX_PORTS + 1];
+	int base_rfci_qpn;
+	int num_rfci_qps;
+	int log_num_mac;
+	int log_num_vlan;
+	int log_num_prio;
+	struct list_head pgdir_list;
+	struct mutex pgdir_mutex;
+	void __iomem *uar_map;
+	struct mlx4_uar priv_uar;
+	u32 priv_pdn;
+	struct mlx4_mr mr;
+	struct device *dma_dev;
+	int idx;
+	 MLX4_DECLARE_DOORBELL_LOCK(uar_lock);
+};
+
+struct mfc_rfci_rx_info {
+	struct mfc_vhba *vhba;
+	struct sk_buff *skb;
+	struct work_struct work;
+};
+
+struct mfc_flogi_finished_info {
+	struct work_struct work;
+	struct sk_buff *skb;
+	u8 eof;
+	struct fc_lport *lp;
+};
+
+struct mfc_ctrl_seg {
+	__be32 op_own;
+	__be16 vlan;
+	__be16 size;
+	__be32 flags;
+	__be32 parameter;
+};
+
+struct mfc_datagram_seg {
+	__be32 fl_portn_pd;
+	u8 reserved1;
+	u8 mlid_grh;
+	__be16 rlid;
+	u8 reserved2;
+	u8 mgid_idx;
+	u8 stat_rate;
+	u8 hop_limit;
+	__be32 sl_tclass_flabel;
+	__be32 rgid[4];
+	__be32 dqpn;
+	__be32 qkey;
+	__be32 reserved3[2];
+};				/* size 12 dwords */
+
+struct mfc_data_seg {
+	__be32 count;
+	__be32 mem_type;
+	__be64 addr;
+};
+
+struct mfcoe_rfci_tx_desc {
+	struct mfc_ctrl_seg ctrl;
+	struct mfc_data_seg data;	/* at least one data segment */
+};				/* size 8 dwords */
+
+struct mfcoib_rfci_tx_desc {
+	struct mfc_ctrl_seg ctrl;
+	struct mfc_datagram_seg dgram;
+	struct mfc_data_seg data;	/* at least one data segment */
+};				/* size 20 dwords */
+
+struct mfc_rx_desc {
+	struct mfc_data_seg data[0];
+};
+
+struct mfc_eth_addr_seg {
+	u8 static_rate;
+	u8 reserved1[3];
+	__be32 reserved2;
+	u8 reserved3[2];
+	u8 dmac[6];
+};
+
+struct mfc_init_seg {
+	u8 reserved1;
+	u8 pe;
+	u16 reserved;
+	u8 cs_ctl;
+	u8 seq_id_tx;
+	__be16 mtu;
+	u8 remote_fid[3];
+	u8 flags;
+	__be16 remote_exch;
+	__be16 local_exch_idx;
+};
+
+struct mfcoe_cmd_tx_desc {
+	struct mfc_ctrl_seg ctrl;
+	struct mfc_eth_addr_seg addr;
+	struct mfc_init_seg init;
+	struct mfc_data_seg data;
+};				/* 16 DWORDS, 64B */
+
+struct mfcoib_cmd_tx_desc {
+	struct mfc_ctrl_seg ctrl;
+	struct mfc_datagram_seg addr;
+	struct mfc_init_seg init;
+	struct mfc_data_seg data;
+};				/* 24 DWORDS, 96B */
+
+struct mfc_rx_thread {
+	int cpu;
+	struct task_struct *thread;
+	struct sk_buff_head rx_list;
+};
+
+static inline int mlx4_qp_to_reset(struct mlx4_dev *dev, struct mlx4_qp *qp)
+{
+	return mlx4_cmd(dev, 0, qp->qpn, 2,
+			MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A);
+}
+
+static inline int mlx4_qp_to_error(struct mlx4_dev *dev, struct mlx4_qp *qp)
+{
+	return mlx4_cmd(dev, 0, qp->qpn, 0,
+			MLX4_CMD_2ERR_QP, MLX4_CMD_TIME_CLASS_A);
+}
+
+#define mfc_bitmap_empty(bm)					\
+	(find_first_bit((bm)->addr, (bm)->size) >= (bm)->size)
+
+static inline int mfc_bitmap_alloc(struct mfc_bitmap *bitmap, unsigned size)
+{
+	bitmap->addr = kzalloc(sizeof(unsigned long) * BITS_TO_LONGS(size),
+			       GFP_KERNEL);
+	if (!bitmap->addr)
+		return -ENOMEM;
+
+	bitmap->size = size;
+	bitmap->last_bit = size - 1;
+
+	return 0;
+}
+
+static inline void mfc_bitmap_free(struct mfc_bitmap *bitmap)
+{
+	kfree(bitmap->addr);
+}
+
+static inline int mfc_bitmap_slot_alloc(struct mfc_bitmap *bm, int from_zero)
+{
+	int slot_num, last_bit = bm->last_bit + 1;
+
+	if (from_zero)
+		last_bit = 0;
+	do {
+		slot_num = find_next_zero_bit(bm->addr, bm->size,
+					      last_bit % bm->size);
+		if (slot_num >= bm->size) {
+			slot_num = find_first_zero_bit(bm->addr, bm->size);
+			if (slot_num >= bm->size)
+				return -1;
+		}
+	} while (test_and_set_bit(slot_num, bm->addr));
+
+	bm->last_bit = slot_num;
+	return slot_num;
+}
+
+static inline void mfc_bitmap_slot_free(struct mfc_bitmap *bm, int slot_num)
+{
+	if (slot_num >= bm->size)
+		printk(KERN_WARNING
+		       "Error: Trying to free out of bound slot number\n");
+	clear_bit(slot_num, bm->addr);
+}
+
+static inline char *mfc_bitmap_print(struct mfc_bitmap *bm)
+{
+#define BM_STR_BUF_LEN 1024
+	static char buf[BM_STR_BUF_LEN];
+	int i;
+	int len = 0;
+
+	len +=
+	    snprintf(buf + len, BM_STR_BUF_LEN - len, "size: %d, ", bm->size);
+
+	for (i = 0; i < BITS_TO_LONGS(bm->size); i++) {
+		len += snprintf(buf + len, BM_STR_BUF_LEN - len, "%08llx ",
+				cpu_to_be64(bm->addr[i]));
+	}
+
+	buf[len] = '\0';
+	return buf;
+}
+
+static inline void mfc_ring_db_rx(struct mfc_qp *fc_qp)
+{
+	struct mfc_queue *rq = &fc_qp->rq;
+
+	wmb();
+	*fc_qp->wqres.db.db = cpu_to_be32(rq->prod & 0xffff);
+	wmb();
+}
+
+extern int mfc_num_reserved_xids;
+extern int mfc_t11_mode;
+extern int mfc_debug_mode;
+
+extern int mfc_create_rfci(struct mfc_vhba *, struct mfc_rfci *, u64);
+extern int mfc_destroy_rfci(struct mfc_vhba *, struct mfc_rfci *);
+extern int mfc_init_rfci(struct mfc_vhba *, struct mfc_rfci *);
+extern int mfc_start_rfci_data(struct mfc_vhba *, u64);
+
+extern int mfc_init_port(struct mfc_dev *, int);
+extern void mfc_free_port(struct mfc_dev *, int);
+
+extern int mfc_create_vhba(struct mfc_port *, unsigned int, int, int,
+			   int, unsigned long, unsigned long, int, void *,
+			   const char *, u64, fcoib_send_els_cb,
+			   enum mfc_net_type, u64, u64);
+extern void mfc_remove_vhba(struct mfc_vhba *);
+
+extern int mfc_init_fcmd(struct mfc_vhba *);
+extern int mfc_reset_fcmd(struct mfc_vhba *);
+extern int mfc_create_fcmd(struct mfc_vhba *);
+extern void mfc_destroy_fcmd(struct mfc_vhba *);
+extern int mfc_post_rx_buf(struct mfc_dev *, struct mfc_qp *, void *, size_t);
+extern int mfc_q_init(struct mfc_queue *, u16, size_t, size_t);
+extern void mfc_q_destroy(struct mfc_queue *);
+extern void mfc_stamp_q(struct mfc_queue *);
+extern int flush_qp(struct mfc_dev *, struct mfc_qp *, int, int,
+		    struct mfc_cq *, struct mfc_exch *);
+extern int mfc_create_cq(struct mfc_vhba *, struct mfc_cq *, int, int, int,
+			 comp_fn, comp_fn, char *);
+extern void mfc_destroy_cq(struct mfc_cq *);
+extern void mfc_cq_clean(struct mfc_cq *);
+extern int mfc_flogi_finished(struct fc_lport *);
+extern void mfc_recv_flogi(struct fc_lport *, struct fc_frame *, u8 mc[6]);
+extern int mfc_reset_fexch(struct mfc_vhba *, struct mfc_exch *);
+extern int mfc_frame_send(struct fc_lport *, struct fc_frame *);
+extern int mfc_send_abort_tsk(struct mfc_exch *, u32);
+extern int mfc_queuecommand(struct scsi_cmnd *,
+			    void (*done) (struct scsi_cmnd *));
+
+extern void mfc_vhba_delete_dentry(struct mfc_vhba *);
+extern int mfc_vhba_create_dentry(struct mfc_vhba *);
+extern void mfc_port_delete_dentry(struct mfc_port *);
+extern int mfc_port_create_dentry(struct mfc_port *);
+
+#endif /* MFC_H */
diff --git a/drivers/scsi/mlx4_fc/mfc_exch.c b/drivers/scsi/mlx4_fc/mfc_exch.c
new file mode 100644
index 0000000..72eda55
--- /dev/null
+++ b/drivers/scsi/mlx4_fc/mfc_exch.c
@@ -0,0 +1,1496 @@
+/*
+ * Copyright (c) 2010 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/log2.h>
+#include <linux/delay.h>
+#include <linux/vmalloc.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_transport.h>
+#include <scsi/scsi_transport_fc.h>
+#include <scsi/scsi_tcq.h>
+#include <linux/mlx4/driver.h>
+#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/cq.h>
+#include <scsi/fc_frame.h>
+#include <scsi/fc/fc_fcp.h>
+#include <scsi/fc/fc_els.h>
+#include <scsi/fc/fc_fcoe.h>
+#include <scsi/libfc.h>
+
+#include "mfc.h"
+
+static enum mfc_cmd_io_dir scsi_dir_translate(enum dma_data_direction dmadir)
+{
+	switch (dmadir) {
+	case DMA_BIDIRECTIONAL:
+		return FCMD_IO_DIR_BIDI;
+	case DMA_FROM_DEVICE:
+		return FCMD_IO_DIR_READ;
+	case DMA_TO_DEVICE:
+		return FCMD_IO_DIR_WRITE;
+	case DMA_NONE:
+		return FCMD_IO_DIR_TARGET;
+	}
+	return -1;
+}
+
+static void mfc_cmd_tx_comp(struct mfc_vhba *vhba, struct mlx4_cqe *cqe)
+{
+	struct mfc_port *fc_port = vhba->mfc_port;
+	struct mfc_dev *mfc_dev = fc_port->mfc_dev;
+	struct mfc_queue *sq = &vhba->fcmd.fc_qp.sq;
+	struct mfc_data_seg *data = NULL;
+	struct mfc_exch *fexch;
+	u64 dma;
+	u32 count;
+	unsigned long flags;
+	int is_err = 0, xno = 0;
+	int wqe_idx = be16_to_cpu(cqe->wqe_index) & sq->size_mask;
+
+	is_err = ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == 0x1e) ? 1 : 0;
+
+	if (is_err && vhba->fcmd.fc_qp.is_flushing)
+		dev_info(mfc_dev->dma_dev, "FCMD WQE %d flushed\n", wqe_idx);
+
+	if (vhba->net_type == NET_IB) {
+		struct mfcoib_cmd_tx_desc *tx_desc;
+
+		tx_desc = sq->buf + wqe_idx * FCMD_SQ_BB_SIZE;
+		data = &tx_desc->data;
+		xno = be16_to_cpu(tx_desc->init.local_exch_idx);
+	} else if (vhba->net_type == NET_ETH) {
+		struct mfcoe_cmd_tx_desc *tx_desc;
+
+		tx_desc = sq->buf + wqe_idx * FCMD_SQ_BB_SIZE;
+		data = &tx_desc->data;
+		xno = be16_to_cpu(tx_desc->init.local_exch_idx);
+	}
+
+	fexch = &vhba->fexch[xno];
+	fexch->tx_completed = 1;
+
+	dma = be64_to_cpu(data->addr);
+	count = be32_to_cpu(data->count);
+
+	pci_unmap_single(mfc_dev->dev->pdev, dma, count, PCI_DMA_TODEVICE);
+
+	spin_lock_irqsave(&sq->lock, flags);
+	sq->cons++;
+	spin_unlock_irqrestore(&sq->lock, flags);
+}
+
+static int mfc_map_sg_to_fmr(struct mfc_dev *mfc_dev,
+			     struct scatterlist *sglist, int nents,
+			     struct mfc_exch *fexch,
+			     enum dma_data_direction dir)
+{
+	struct mlx4_fmr *fmr = &fexch->fmr;
+	struct scatterlist *sg;
+	int page_cnt, sg_cnt;
+	unsigned int total_len;
+	int i;
+	u64 fmr_page_mask = SHIFT_TO_MASK(fmr->page_shift);
+	u64 dma;
+	u64 page_list[MFC_MAX_FMR_PAGES];
+	unsigned int fmr_page_size = SHIFT_TO_SIZE(fmr->page_shift);
+	u32 rkey, lkey;
+	int rc = 0;
+
+	sg_cnt = pci_map_sg(mfc_dev->dev->pdev, sglist, nents, dir);
+
+	if (sg_dma_address(sglist) & ~fmr_page_mask) {
+		rc = -EAGAIN;
+		goto out_unmap;
+	}
+
+	page_cnt = 0;
+	total_len = 0;
+	for_each_sg(sglist, sg, sg_cnt, i) {
+		total_len += sg_dma_len(sg);
+
+		if (sg_dma_address(sg) & ~fmr_page_mask) {
+			if (i > 0) {
+				rc = -EINVAL;
+				goto out_unmap;
+			}
+		}
+		if ((sg_dma_address(sg) + sg_dma_len(sg)) & ~fmr_page_mask) {
+			if (i < sg_cnt - 1) {
+				rc = -EINVAL;
+				goto out_unmap;
+			}
+		}
+
+		for (dma = (sg_dma_address(sg) & fmr_page_mask);
+		     dma < sg_dma_address(sg) + sg_dma_len(sg);
+		     dma += fmr_page_size) {
+			if (page_cnt == fmr->max_pages) {
+				rc = -EINVAL;
+				goto out_unmap;
+			}
+
+			page_list[page_cnt] = dma;
+			++page_cnt;
+		}
+	}
+
+	rc = mlx4_map_phys_fmr_fbo(mfc_dev->dev, fmr, page_list, page_cnt, 0,
+				   sg_dma_address(sglist) & ~fmr_page_mask,
+				   total_len, &lkey, &rkey, 1);
+	if (rc) {
+		dev_err(mfc_dev->dma_dev, "Could not map FMR rc=%d\n", rc);
+		goto out_unmap;
+	}
+
+	return 0;
+
+out_unmap:
+	pci_unmap_sg(mfc_dev->dev->pdev, sglist, nents, dir);
+	return rc;
+}
+
+#define BOUNCESIZE 16384
+
+static int mfc_create_bounce(struct mfc_dev *mfc_dev,
+			     struct scsi_cmnd *cmd, struct mfc_exch *fexch)
+{
+	void *bouncebuff;
+	int bounceoff = fexch->bounce_off;
+	unsigned int total_len;
+	struct scatterlist *sg;
+	void *page_addr;
+	int i;
+
+	if (scsi_bufflen(cmd) > BOUNCESIZE - bounceoff)
+		return -ENOMEM;
+
+	bouncebuff = kmalloc(BOUNCESIZE, GFP_ATOMIC);
+	if (!bouncebuff)
+		return -ENOMEM;
+
+	total_len = 0;
+	scsi_for_each_sg(cmd, sg, scsi_sg_count(cmd), i) {
+		if (cmd->sc_data_direction == DMA_TO_DEVICE) {
+			page_addr = kmap_atomic(sg_page(sg), KM_SOFTIRQ0);
+			memcpy(bouncebuff + bounceoff + total_len,
+			       page_addr + (sg->offset & ~PAGE_MASK),
+			       sg->length);
+			kunmap_atomic(page_addr, KM_SOFTIRQ0);
+		}
+		total_len += sg->length;
+	}
+	sg_init_one(&fexch->bounce_sg[0], bouncebuff + bounceoff,
+		    scsi_bufflen(cmd));
+	fexch->bouncebuff = bouncebuff;
+
+	return 0;
+}
+
+static int mfc_map_fmr(struct mfc_dev *mfc_dev, struct scsi_cmnd *cmd,
+		       struct mfc_exch *fexch)
+{
+	int err;
+
+	if (cmd->sc_data_direction == DMA_NONE)
+		return 0;
+
+	if (cmd->sc_data_direction == DMA_BIDIRECTIONAL) {
+		dev_err(mfc_dev->dma_dev, "BIDI SCSI commands not supported\n");
+		return -EINVAL;
+	}
+
+	fexch->bouncebuff = NULL;
+	fexch->bounce_off = 0;
+	err = mfc_map_sg_to_fmr(mfc_dev,
+				scsi_sglist(cmd), scsi_sg_count(cmd),
+				fexch, cmd->sc_data_direction);
+	if ((err == -EAGAIN) || (err == -EINVAL)) {
+		err = mfc_create_bounce(mfc_dev, cmd, fexch);
+		if (err)
+			return err;
+		err = mfc_map_sg_to_fmr(mfc_dev, &fexch->bounce_sg[0], 1,
+					fexch, cmd->sc_data_direction);
+	}
+
+	return err;
+}
+
+void mfc_rx_fcp_resp(struct mfc_vhba *vhba, struct fcp_resp *fc_rp,
+		     struct scsi_cmnd *scmd, size_t xfer_len,
+		     struct mlx4_fmr *fmr, u32 xno)
+{
+	enum dma_data_direction data_dir;
+	u8 cdb_op;
+	struct fcp_resp_ext *rp_ex;
+	struct fcp_resp_rsp_info *fc_rp_info;
+	u32 respl = 0;
+	u32 snsl = 0;
+	u32 scsi_resid;
+	u8 cdb_status;
+	unsigned data_len = scsi_bufflen(scmd);
+
+	/* things from openfc_scsi_rcv(), RESPONSE branch */
+	cdb_status = fc_rp->fr_status;
+	if ((fc_rp->fr_flags == 0) && (fc_rp->fr_status == 0)) {
+		data_dir = scmd->sc_data_direction;
+		cdb_op = scmd->cmnd[0];
+		if (data_dir == DMA_FROM_DEVICE) {
+			if (data_len > xfer_len) {
+				if ((cdb_op != READ_10 ||
+				     cdb_op != READ_6 ||
+				     cdb_op != WRITE_10 || cdb_op != WRITE_6)) {
+					scmd->result = DID_IMM_RETRY << 16;
+				} else {
+					if (cdb_status == 0) {
+						scmd->result =
+						    (DID_OK << 16) | cdb_status;
+					} else {
+						scmd->SCp.buffers_residual =
+						    scsi_resid;
+						scmd->result =
+						    (DID_ERROR << 16) |
+						    cdb_status;
+					}
+				}
+			} else if (data_len < xfer_len) {
+				if ((cdb_op != READ_10 ||
+				     cdb_op != READ_6 ||
+				     cdb_op != WRITE_10 || cdb_op != WRITE_6)) {
+					scmd->result = DID_IMM_RETRY << 16;
+				} else {
+					scmd->result =
+					    (DID_ERROR << 16) | cdb_status;
+				}
+			} else
+				scmd->result = (DID_OK << 16);
+		}
+	} else {
+		rp_ex = (void *)(fc_rp + 1);
+		fc_rp_info = (struct fcp_resp_rsp_info *)(rp_ex + 1);
+		if (fc_rp->fr_flags & FCP_RSP_LEN_VAL) {
+			respl = ntohl(rp_ex->fr_rsp_len);
+			if ((respl != 0 && respl != 4 && respl != 8) ||
+			    (fc_rp_info->rsp_code != FCP_TMF_CMPL)) {
+				scmd->result = (DID_ERROR << 16);
+			}
+		}
+		if (fc_rp->fr_flags & FCP_SNS_LEN_VAL) {
+			snsl = ntohl(rp_ex->fr_sns_len);
+			if (snsl > SCSI_SENSE_BUFFERSIZE)
+				snsl = SCSI_SENSE_BUFFERSIZE;
+			memcpy(scmd->sense_buffer,
+			       &fc_rp_info->_fr_resvd[0] + respl, snsl);
+		}
+		if (fc_rp->fr_flags & FCP_RESID_UNDER) {
+			scsi_resid = ntohl(rp_ex->fr_resid);
+			/*
+			 * The cmnd->underflow is the minimum number of
+			 * bytes that must be transfered for this
+			 * command.  Provided a sense condition is not
+			 * present, make sure the actual amount
+			 * transferred is at least the underflow value
+			 * or fail.
+			 */
+			if (!(fc_rp->fr_flags & FCP_SNS_LEN_VAL) &&
+			    (fc_rp->fr_status == 0) &&
+			    (scsi_bufflen(scmd) -
+			     scsi_resid) < scmd->underflow) {
+				scmd->result = (DID_ERROR << 16);
+			}
+		} else if (fc_rp->fr_flags & FCP_RESID_OVER) {
+			scmd->result = (DID_ERROR << 16);
+		}
+	}
+}
+
+static void mfc_unmap_fmr_sg(struct mfc_dev *mfc_dev,
+			     struct scatterlist *sglist, int nents,
+			     struct mlx4_fmr *fmr, enum dma_data_direction dir)
+{
+	u32 dummy_lkey, dummy_rkey;
+
+	pci_unmap_sg(mfc_dev->dev->pdev, sglist, nents, dir);
+	mlx4_fmr_unmap(mfc_dev->dev, fmr, &dummy_lkey, &dummy_rkey);
+}
+
+static void mfc_destroy_bounce(struct mfc_dev *mfc_dev, struct scsi_cmnd *cmd,
+			       struct mfc_exch *fexch)
+{
+	struct scatterlist *sg;
+	int i;
+	unsigned long total_len;
+	char *page_addr;
+
+	if (cmd->sc_data_direction == DMA_FROM_DEVICE) {
+		total_len = 0;
+		scsi_for_each_sg(cmd, sg, scsi_sg_count(cmd), i) {
+			page_addr = kmap_atomic(sg_page(sg), KM_SOFTIRQ0);
+			memcpy(page_addr + (sg->offset & ~PAGE_MASK),
+			       fexch->bouncebuff + fexch->bounce_off +
+			       total_len, sg->length);
+			kunmap_atomic(page_addr, KM_SOFTIRQ0);
+			total_len += sg->length;
+		}
+	}
+
+	kfree(fexch->bouncebuff);
+	fexch->bouncebuff = NULL;
+}
+
+static void mfc_unmap_fmr(struct mfc_dev *mfc_dev, struct scsi_cmnd *cmd,
+			  struct mfc_exch *fexch)
+{
+	if ((cmd->sc_data_direction == DMA_NONE) ||
+	    (cmd->sc_data_direction == DMA_BIDIRECTIONAL))
+		return;
+
+	if (fexch->bouncebuff) {
+		mfc_unmap_fmr_sg(mfc_dev, &fexch->bounce_sg[0], 1,
+				 &fexch->fmr, cmd->sc_data_direction);
+		mfc_destroy_bounce(mfc_dev, cmd, fexch);
+	} else
+		mfc_unmap_fmr_sg(mfc_dev, scsi_sglist(cmd),
+				 scsi_sg_count(cmd), &fexch->fmr,
+				 cmd->sc_data_direction);
+}
+
+/*
+ * FEXCH completion - pay attention: ethernet header is stripped.
+ */
+static void mfc_exch_rx_comp(struct mfc_vhba *vhba, struct mlx4_cqe *g_cqe)
+{
+	struct mfc_exch_cqe *cqe = (struct mfc_exch_cqe *)g_cqe;
+	struct mfc_exch *fexch;
+	struct mfc_queue *rq;
+	int wqe_idx;
+	struct mfc_rx_desc *rx_desc;
+	int xno;
+	u32 qpn;
+	unsigned long flags;
+	struct fcp_resp *fc_rp;
+	size_t rxcnt;
+	u_int hlen;
+
+	qpn = be32_to_cpu(cqe->my_qpn) & ((1 << 24) - 1);
+	xno = qpn - vhba->base_fexch_qpn;
+	fexch = &vhba->fexch[xno];
+
+	rq = &fexch->fc_qp.rq;
+
+	wqe_idx = be16_to_cpu(cqe->wqe_index) & rq->size_mask;
+	rx_desc = rq->buf + (wqe_idx * rq->stride);
+
+	pci_unmap_single(vhba->mfc_port->mfc_dev->dev->pdev,
+			 be64_to_cpu(rx_desc->data[0].addr),
+			 be32_to_cpu(rx_desc->data[0].count),
+			 PCI_DMA_FROMDEVICE);
+
+	if (fexch->state == FEXCH_ABORT || fexch->state == FEXCH_SEND_ABORT) {
+		fexch->scmd->result = (DID_ABORT << 16);
+		fexch->state = FEXCH_CMD_DONE;
+		if ((g_cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != 0x1e)
+			complete(&fexch->tm_done);
+		goto out_cons;
+	}
+
+	if ((g_cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == 0x1e)
+		fexch->scmd->result = (DID_ERROR << 16);
+	else {
+		rxcnt = be32_to_cpu(cqe->rcv_byte_count);
+
+		if (!mfc_t11_mode)
+			hlen = sizeof(struct fcoe_hdr_old);
+		else
+			hlen = sizeof(struct fcoe_hdr);
+
+		fc_rp =
+		    (struct fcp_resp *)(fexch->response_buf + 2 + hlen + 24);
+
+		mfc_rx_fcp_resp(vhba, fc_rp, fexch->scmd, rxcnt,
+				&fexch->fmr, xno);
+	}
+
+	spin_lock_irqsave(fexch->scmd->device->host->host_lock, flags);
+
+	mfc_unmap_fmr(vhba->mfc_port->mfc_dev, fexch->scmd, fexch);
+
+	fexch->scmd->scsi_done(fexch->scmd);
+
+	if (!fexch->fc_qp.is_flushing || vhba->fcmd.fc_qp.is_flushing)
+		mfc_bitmap_slot_free(&vhba->fexch_bm, xno);
+
+	spin_unlock_irqrestore(fexch->scmd->device->host->host_lock, flags);
+
+out_cons:
+	spin_lock_irqsave(&rq->lock, flags);
+	rq->cons++;
+	spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+static void mfc_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
+{
+	printk(KERN_WARNING "qp event for qpn=0x%08x event_type=0x%x\n",
+	       qp->qpn, type);
+}
+
+static int mfc_create_fexch(struct mfc_vhba *vhba, int xno)
+{
+	struct mfc_port *fc_port = vhba->mfc_port;
+	struct mfc_dev *mfc_dev = fc_port->mfc_dev;
+	struct mfc_exch *fexch = &vhba->fexch[xno];
+	struct mfc_qp *qp = &fexch->fc_qp;
+	struct mfc_queue *sq = &qp->sq;
+	struct mfc_queue *rq = &qp->rq;
+	int err = 0;
+
+	fexch->vhba = vhba;
+	mfc_q_init(sq, FEXCH_SQ_BB_SIZE, FEXCH_SQ_NUM_BBS, 0);
+	mfc_q_init(rq, FEXCH_RQ_WQE_SIZE, FEXCH_RQ_NUM_WQES, 0);
+
+	qp->buf_size = (sq->size * sq->stride) + (rq->size * rq->stride);
+
+	err = mlx4_alloc_hwq_res(mfc_dev->dev, &qp->wqres, qp->buf_size,
+				 qp->buf_size);
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"Could not allocate man for FEXCH %d\n", xno);
+		goto err_free_rxinfo;
+	}
+
+	if (FEXCH_SQ_BB_SIZE >= FEXCH_RQ_WQE_SIZE) {
+		sq->buf = qp->wqres.buf.direct.buf;
+		rq->buf = sq->buf + (sq->size * sq->stride);
+	} else {
+		rq->buf = qp->wqres.buf.direct.buf;
+		sq->buf = rq->buf + (rq->size * rq->stride);
+	}
+
+	*qp->wqres.db.db = 0;
+
+	mfc_stamp_q(sq);
+	mfc_stamp_q(rq);
+
+	err = mlx4_qp_alloc(mfc_dev->dev, vhba->base_fexch_qpn + xno, &qp->mqp);
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"Could not allocate QP number 0x%x\n", qp->mqp.qpn);
+		goto err_free_man;
+	}
+
+	qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
+	qp->mqp.event = mfc_qp_event;
+
+	err = mlx4_fmr_alloc_reserved(mfc_dev->dev, vhba->base_fexch_mpt + xno,
+				      mfc_dev->priv_pdn |
+				      MLX4_MPT_ENABLE_INVALIDATE,
+				      MLX4_PERM_REMOTE_WRITE |
+				      MLX4_PERM_REMOTE_READ,
+				      MFC_MAX_FMR_PAGES, 1,
+				      MFC_FMR_PAGE_SHIFT, &fexch->fmr);
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"Could not allocate FMR for fexch %d, err=%d\n", xno,
+			err);
+		goto err_free_qp;
+	}
+
+	err = mlx4_fmr_enable(mfc_dev->dev, &fexch->fmr);
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"Could not enable FMR for fexch %d, err=%d\n", xno,
+			err);
+		goto err_free_fmr;
+	}
+
+	return 0;
+
+err_free_fmr:
+	mlx4_fmr_free_reserved(mfc_dev->dev, &fexch->fmr);
+err_free_qp:
+	mlx4_qp_remove(mfc_dev->dev, &fexch->fc_qp.mqp);
+	mlx4_qp_free(mfc_dev->dev, &fexch->fc_qp.mqp);
+err_free_man:
+	mlx4_free_hwq_res(mfc_dev->dev, &qp->wqres, qp->buf_size);
+err_free_rxinfo:
+	mfc_q_destroy(rq);
+	return err;
+}
+
+static int wait_for_q_comp(struct mfc_queue *q)
+{
+	unsigned long end;
+	unsigned long flags;
+	int err;
+
+	end = jiffies + 20 * HZ;
+	spin_lock_irqsave(&q->lock, flags);
+	while ((int)(q->prod - q->cons) > 1) {
+		spin_unlock_irqrestore(&q->lock, flags);
+		msleep(1000 / HZ);
+		if (time_after(jiffies, end))
+			break;
+		spin_lock_irqsave(&q->lock, flags);
+	}
+
+	if ((int)(q->prod - q->cons) > 1)
+		err = 1;
+	else
+		err = 0;
+
+	spin_unlock_irqrestore(&q->lock, flags);
+
+	return err;
+}
+
+DEFINE_SPINLOCK(cq_poll);
+
+int wait_for_fexch_tx_comp(struct mfc_exch *fexch, struct mfc_cq *cq)
+{
+	int err;
+	unsigned long end;
+	unsigned long flags;
+
+	end = jiffies + 20 * HZ;
+	while (!fexch->tx_completed) {
+		if (spin_trylock_irqsave(&cq_poll, flags)) {
+			mfc_cq_clean(cq);
+			spin_unlock_irqrestore(&cq_poll, flags);
+		}
+
+		msleep(1000 / HZ);
+
+		if (time_after(jiffies, end))
+			break;
+	}
+	if (!fexch->tx_completed)
+		err = 1;
+	else
+		err = 0;
+
+	return err;
+}
+
+int flush_qp(struct mfc_dev *mfc_dev, struct mfc_qp *qp, int is_sq,
+	     int is_rq, struct mfc_cq *cq, struct mfc_exch *fexch)
+{
+	struct mfc_queue *sq = &qp->sq;
+	struct mfc_queue *rq = &qp->rq;
+	unsigned long flags;
+	int err = 0;
+
+	qp->is_flushing = 1;
+
+	err = mlx4_qp_to_error(mfc_dev->dev, &qp->mqp);
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"Error %d bringing QP to error state, qpn=0x%x\n",
+			err, qp->mqp.qpn);
+		return err;
+	}
+
+	/* if sq in use (FCMD, RFCI), wait for sq flush */
+	if (is_sq) {
+		if (cq)
+			if (spin_trylock_irqsave(&cq_poll, flags)) {
+				mfc_cq_clean(cq);
+				spin_unlock_irqrestore(&cq_poll, flags);
+			}
+
+		err = wait_for_q_comp(sq);
+		if (err)
+			dev_err(mfc_dev->dma_dev,
+				"Error %d send q was not flushed after error\n",
+				err);
+	}
+
+	/* if rq in use (FEXCH, RFCI), wait for rq flush */
+	if (is_rq) {
+		if (cq) {
+			if (spin_trylock_irqsave(&cq_poll, flags)) {
+				mfc_cq_clean(cq);
+				spin_unlock_irqrestore(&cq_poll, flags);
+			}
+		}
+		if (fexch && !fexch->tx_completed) {
+			err = wait_for_fexch_tx_comp(fexch, cq);
+			if (err) {
+				dev_err(mfc_dev->dma_dev,
+					"ERROR: %d FCMD TX did not completed\n",
+					err);
+				return err;
+			}
+		}
+
+		err = wait_for_q_comp(rq);
+		if (err)
+			dev_err(mfc_dev->dma_dev,
+				"Error rq was not flushed after error %d\n",
+				err);
+
+	}
+
+	return err;
+}
+
+static int mfc_destroy_fexch(struct mfc_vhba *vhba, int xno)
+{
+	struct mfc_port *fc_port = vhba->mfc_port;
+	struct mfc_dev *mfc_dev = fc_port->mfc_dev;
+	struct mfc_exch *fexch = &vhba->fexch[xno];
+	struct mfc_qp *qp = &fexch->fc_qp;
+	struct mfc_queue *rq = &qp->rq;
+	struct mfc_queue *sq = &qp->sq;
+	int err = 0;
+
+	if (qp->is_created) {
+		err = flush_qp(mfc_dev, qp, 0, 1, &vhba->fcmd.fc_cq, fexch);
+		if (err) {
+			dev_err(mfc_dev->dma_dev,
+				"error flushing fexch qp, try host reset.\n");
+			goto out;
+		}
+	}
+
+	mlx4_fmr_free_reserved(mfc_dev->dev, &fexch->fmr);
+	if (qp->is_created)
+		mlx4_qp_to_reset(mfc_dev->dev, &qp->mqp);
+	qp->is_created = 0;
+	mlx4_qp_remove(mfc_dev->dev, &qp->mqp);
+	mlx4_qp_free(mfc_dev->dev, &qp->mqp);
+	mlx4_free_hwq_res(mfc_dev->dev, &qp->wqres, qp->buf_size);
+	mfc_q_destroy(rq);
+	mfc_q_destroy(sq);
+out:
+	return err;
+}
+
+int mfc_init_fexch(struct mfc_vhba *vhba, int xno)
+{
+	struct mfc_dev *mfc_dev = vhba->mfc_port->mfc_dev;
+	struct mfc_exch *fexch = &vhba->fexch[xno];
+	struct mfc_qp *qp = &fexch->fc_qp;
+	enum mlx4_qp_state qp_state = MLX4_QP_STATE_RST;
+	int err = 0;
+	u8 sched_q = 0;
+	struct mlx4_qp_context context;
+
+	if (vhba->net_type == NET_IB)
+		sched_q = 0x83 |
+		    (vhba->dest_ib_sl & 0xf) << 3 |
+		    (vhba->mfc_port->port - 1) << 6;
+	else if (vhba->net_type == NET_ETH)
+		sched_q = 0x83 |
+		    vhba->fc_vlan_prio << 3 | (vhba->mfc_port->port - 1) << 6;
+
+	context = (struct mlx4_qp_context) {
+		.flags = cpu_to_be32(QPC_SERVICE_TYPE_FEXCH << 16),
+		.pd = cpu_to_be32(mfc_dev->priv_pdn),
+		/* Raw-ETH requirement */
+		.mtu_msgmax = 0x77,
+		/* this means SQ_NUM_BBS=1, and SQ_BB_SIZE=1 */
+		.sq_size_stride = 0,
+		.rq_size_stride = ilog2(FEXCH_RQ_NUM_WQES) << 3 |
+				  ilog2(FEXCH_RQ_WQE_SIZE >> 4),
+		.usr_page = cpu_to_be32(mfc_dev->priv_uar.index),
+		.local_qpn = cpu_to_be32(qp->mqp.qpn),
+		.pri_path.sched_queue = sched_q,
+		.pri_path.counter_index = 0xff,
+		.pri_path.ackto = (vhba->net_type == NET_IB) ?
+			MLX4_LINK_TYPE_IB : MLX4_LINK_TYPE_ETH,
+		/* Source MAC index */
+		.pri_path.grh_mylmc =  (vhba->net_type == NET_IB) ?
+				       0 : vhba->rfci[RFCI_DATA].fc_mac_idx,
+		.params2 = cpu_to_be32((qp->wqres.buf.direct.map &
+					(PAGE_SIZE - 1)) & 0xfc0),
+		.cqn_send =
+		    cpu_to_be32(vhba->fexch_cq[xno % num_online_cpus()].mcq.
+				cqn),
+		.cqn_recv =
+		    cpu_to_be32(vhba->fexch_cq[xno % num_online_cpus()].mcq.
+				cqn),
+		.db_rec_addr = cpu_to_be64(qp->wqres.db.dma),
+		.srqn = 0,
+		.my_fc_id_idx = vhba->idx,
+		.qkey = cpu_to_be32(MLX4_FCOIB_QKEY),
+	};
+
+	fexch->tx_completed = 1;
+	if (vhba->fc_vlan_id != -1) {
+		context.pri_path.fl = 0x40;
+		context.pri_path.vlan_index = vhba->fc_vlan_idx;
+	}
+
+	err = mlx4_qp_to_ready(mfc_dev->dev, &qp->wqres.mtt, &context, &qp->mqp,
+			       &qp_state);
+
+	if (qp_state != MLX4_QP_STATE_RST)
+		qp->is_created = 1;
+
+	if (qp_state != MLX4_QP_STATE_RTS) {
+		dev_err(mfc_dev->dma_dev,
+			"Error bringing FEXCH %d QP to RTS state, qpn=0x%x\n",
+			xno, qp->mqp.qpn);
+		return err;
+	}
+
+	fexch->fc_qp.is_flushing = 0;
+
+	return 0;
+}
+
+int mfc_fill_abort_hdr(struct fc_frame *fp, u32 did, u32 sid,
+		       u16 ox_id, u8 seq_id)
+{
+
+	struct fc_frame_header *fh;
+	u16 fill;
+
+	/* Fill header */
+	fh = fc_frame_header_get(fp);
+	fh->fh_r_ctl = FC_RCTL_BA_ABTS;
+	hton24(fh->fh_d_id, did);
+	hton24(fh->fh_s_id, sid);
+	fh->fh_type = FC_TYPE_BLS;
+	hton24(fh->fh_f_ctl, FC_FC_END_SEQ | FC_FC_SEQ_INIT);
+	fh->fh_cs_ctl = 0;
+	fh->fh_df_ctl = 0;
+	fh->fh_ox_id = htons(ox_id);
+	fh->fh_rx_id = htons(FC_XID_UNKNOWN);
+	fh->fh_seq_id = seq_id;
+	fh->fh_seq_cnt = 0;
+	fh->fh_parm_offset = htonl(0);
+
+	/* Fill SOF and EOF */
+	fr_sof(fp) = FC_SOF_I3;	/* resume class 3 */
+	fr_eof(fp) = FC_EOF_T;
+
+	fill = fr_len(fp) & 3;
+	if (fill) {
+		fill = 4 - fill;
+		/* TODO, this may be a problem with fragmented skb */
+		skb_put(fp_skb(fp), fill);
+		hton24(fh->fh_f_ctl, ntoh24(fh->fh_f_ctl) | fill);
+	}
+
+	return 0;
+}
+
+int mfc_send_abort_tsk(struct mfc_exch *fexch, u32 rport_id)
+{
+	struct fc_frame *fp;
+	struct fc_lport *lp;
+	struct mfc_vhba *vhba = fexch->vhba;
+	int ox_id, err = 0, xno;
+
+	/* check we can use rfci */
+	if (vhba->lp->state != LPORT_ST_READY || fexch->fc_qp.is_flushing)
+		return -EINVAL;
+
+	/* Send abort packet via rfci */
+	xno = fexch - vhba->fexch;
+	ox_id = vhba->base_fexch_qpn + xno - vhba->mfc_port->base_fexch_qpn;
+	lp = vhba->lp;
+	fp = fc_frame_alloc(lp, 0);
+	if (fp) {
+		dev_info(vhba->mfc_port->mfc_dev->dma_dev,
+			 "Sending ABTS for 0x%x fexch\n", xno);
+
+		/* TODO: find out if seq_id = 0 is OK */
+		mfc_fill_abort_hdr(fp, rport_id,
+				   fc_host_port_id(lp->host), ox_id, 0);
+		err = mfc_frame_send(lp, fp);
+	} else {
+		dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+			"Send ABTS for fexch[0x%x] ox_id 0x%x - NOT DONE!\n",
+			xno, ox_id);
+		err = -ENOMEM;
+	}
+
+	return err;
+}
+
+/*
+ * re-init and free fexch bitmap, fexch should be ready for reuse.
+ */
+int mfc_reset_fexch(struct mfc_vhba *vhba, struct mfc_exch *fexch)
+{
+	int err = 0, xno;
+
+	mfc_unmap_fmr(vhba->mfc_port->mfc_dev, fexch->scmd, fexch);
+
+	xno = fexch - vhba->fexch;
+
+	err = mfc_destroy_fexch(vhba, xno);
+	if (err) {
+		dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+			"fail to destroy fexch 0x%x\n", xno);
+		goto out;
+	}
+
+	err = mfc_create_fexch(vhba, xno);
+	if (err) {
+		dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+			"fail to recreate fexch 0x%x\n", xno);
+		goto out;
+	}
+
+	err = mfc_init_fexch(vhba, xno);
+	if (err) {
+		dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+			"fail to init fexch 0x%x\n", xno);
+		mfc_destroy_fexch(vhba, xno);
+		goto out;
+	}
+
+	fexch->state = FEXCH_OK;
+	mfc_bitmap_slot_free(&vhba->fexch_bm, xno);
+out:
+	return err;
+
+}
+
+/*
+ * Attention: This function could be called from interrupt context
+ */
+int mfc_create_fcmd(struct mfc_vhba *vhba)
+{
+	struct mfc_port *fc_port = vhba->mfc_port;
+	struct mfc_dev *mfc_dev = fc_port->mfc_dev;
+	struct mfc_qp *qp = &vhba->fcmd.fc_qp;
+	struct mfc_queue *sq = &qp->sq;
+	struct mfc_queue *rq = &qp->rq;
+	int err = 0;
+	int i, eqidx, cpu;
+
+	err = mfc_q_init(sq, FCMD_SQ_BB_SIZE, vhba->num_fexch,
+			 sizeof(struct fcp_cmnd *));
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"port%d vhba%d: Could not alloc info for fcmd sq\n",
+			fc_port->port, vhba->idx);
+		goto err_out;
+	}
+
+	for (i = 0; i < sq->size; i++) {
+		struct fcp_cmnd *cdb_cmd;
+
+		cdb_cmd = mfc_q_info_get(sq, i, struct fcp_cmnd *) =
+			kmalloc(sizeof(struct fcp_cmnd), GFP_KERNEL);
+		if (!cdb_cmd)
+			goto err_free_txinfo;
+		memset(cdb_cmd, 0, sizeof(*cdb_cmd));
+	}
+
+	err = mfc_q_init(rq, FCMD_RQ_WQE_SIZE, FCMD_RQ_NUM_WQES, 0);
+	if (err) {
+		dev_err(mfc_dev->dma_dev, "Error initializing fcmd rq\n");
+		goto err_free_txinfo;
+	}
+
+	qp->buf_size = (sq->size * sq->stride) + (rq->size * rq->stride);
+
+	err =
+	    mlx4_alloc_hwq_res(mfc_dev->dev, &qp->wqres, qp->buf_size,
+			       qp->buf_size);
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"port%d vhba%d: Could not allocate fcmd, err=%d\n",
+			fc_port->port, vhba->idx, err);
+		goto err_free_txinfo;
+	}
+
+	if (FCMD_SQ_BB_SIZE >= FCMD_RQ_WQE_SIZE) {
+		sq->buf = qp->wqres.buf.direct.buf;
+		rq->buf = sq->buf + (sq->size * sq->stride);
+	} else {
+		rq->buf = qp->wqres.buf.direct.buf;
+		sq->buf = rq->buf + (rq->size * rq->stride);
+	}
+
+	*qp->wqres.db.db = 0;
+
+	mfc_stamp_q(sq);
+	mfc_stamp_q(rq);
+
+	err = mlx4_qp_reserve_range(mfc_dev->dev, 1, 1, &qp->mqp.qpn);
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"port%d vhba%d: Could not resv QPN for fcmd, err=%d\n",
+			fc_port->port, vhba->idx, err);
+		goto err_free_man;
+	}
+
+	err = mlx4_qp_alloc(mfc_dev->dev, qp->mqp.qpn, &qp->mqp);
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"port%d vhba%d: Could not allocate QPN 0x%x\n",
+			fc_port->port, vhba->idx, qp->mqp.qpn);
+		goto err_release_qp;
+	}
+
+	qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
+	qp->mqp.event = mfc_qp_event;
+
+	err = mfc_create_cq(vhba, &vhba->fcmd.fc_cq, vhba->num_fexch,
+			    MLX4_LEAST_ATTACHED_VECTOR, 0,
+			    NULL, mfc_cmd_tx_comp, "FCMD");
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"port%d vhba%d: Failed creating FCMD CQ, err=%d\n",
+			fc_port->port, vhba->idx, err);
+		goto err_free_qp;
+	}
+
+	/* Create FEXCHs for this FCMD */
+	vhba->fexch = vmalloc(vhba->num_fexch * sizeof(struct mfc_exch));
+	if (!vhba->fexch) {
+		dev_err(mfc_dev->dma_dev,
+			"port%d vhba%d: Error allocating fexch array\n",
+			fc_port->port, vhba->idx);
+		goto err_free_cq;
+
+	}
+	memset(vhba->fexch, 0, vhba->num_fexch * sizeof(struct mfc_exch));
+	for (i = 0; i < vhba->num_fexch; i++) {
+		vhba->fexch[i].response_buf =
+		    kmalloc(MFC_CMD_RX_SKB_BUFSIZE, GFP_KERNEL);
+		if (!vhba->fexch[i].response_buf) {
+			dev_err(mfc_dev->dma_dev,
+				"port%d vhba%d fexch %d: Error allocating\n",
+				fc_port->port, vhba->idx, i);
+			goto err_free_fexch_arr;
+		}
+	}
+
+	err = mfc_bitmap_alloc(&vhba->fexch_bm, vhba->num_fexch);
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"port%d vhba%d: Error allocating fexch bitmap for\n",
+			fc_port->port, vhba->idx);
+		goto err_free_fexch_arr;
+	}
+
+	for (i = 0; i < mfc_num_reserved_xids; ++i)
+		set_bit(i, vhba->fexch_bm.addr);
+
+	vhba->base_reserved_xid =
+	    vhba->base_fexch_qpn - fc_port->base_fexch_qpn;
+	vhba->num_reserved_xid = mfc_num_reserved_xids;
+
+	eqidx = 0;
+	for_each_online_cpu(cpu) {
+		err = mfc_create_cq(vhba, &vhba->fexch_cq[eqidx],
+				    vhba->num_fexch / num_online_cpus(),
+				    (eqidx % num_online_cpus()) %
+				     mfc_dev->dev->caps.num_comp_vectors,
+				     1, mfc_exch_rx_comp, NULL, "FEXCH");
+		if (err) {
+			dev_err(mfc_dev->dma_dev,
+				"port%d vhba%d: Failed creating CQ %d err=%d\n",
+				fc_port->port, vhba->idx, eqidx, err);
+			goto err_destroy_fexch_cq;
+		}
+
+		++eqidx;
+	}
+
+	for (i = 0; i < vhba->num_fexch; i++) {
+		err = mfc_create_fexch(vhba, i);
+		if (err) {
+			dev_err(mfc_dev->dma_dev,
+				"Fail to create FEXCH %d err=%d\n", i, err);
+			goto err_destroy_fexch;
+		}
+	}
+
+	return 0;
+
+err_destroy_fexch:
+	while (--i >= 0)
+		mfc_destroy_fexch(vhba, i);
+err_destroy_fexch_cq:
+	while (--eqidx >= 0)
+		mfc_destroy_cq(&vhba->fexch_cq[eqidx]);
+	mfc_bitmap_free(&vhba->fexch_bm);
+err_free_fexch_arr:
+	for (i = 0; i < vhba->num_fexch; i++) {
+		if (!vhba->fexch[i].response_buf)
+			break;
+		kfree(vhba->fexch[i].response_buf);
+	}
+	vfree(vhba->fexch);
+err_free_cq:
+	mfc_destroy_cq(&vhba->fcmd.fc_cq);
+err_free_qp:
+	mlx4_qp_remove(mfc_dev->dev, &qp->mqp);
+	mlx4_qp_free(mfc_dev->dev, &qp->mqp);
+err_release_qp:
+	mlx4_qp_release_range(mfc_dev->dev, qp->mqp.qpn, 1);
+err_free_man:
+	mlx4_free_hwq_res(mfc_dev->dev, &qp->wqres, qp->buf_size);
+err_free_txinfo:
+	for (i = 0; i < sq->size; i++) {
+		if (!mfc_q_info_get(sq, i, struct fcp_cmnd *))
+			break;
+		kfree(mfc_q_info_get(sq, i, struct fcp_cmnd *));
+	}
+	mfc_q_destroy(sq);
+err_out:
+	return err;
+}
+
+void mfc_destroy_fcmd(struct mfc_vhba *vhba)
+{
+	struct mfc_port *fc_port = vhba->mfc_port;
+	struct mfc_dev *mfc_dev = fc_port->mfc_dev;
+	struct mfc_qp *qp = &vhba->fcmd.fc_qp;
+	struct mfc_queue *sq = &qp->sq;
+	struct mfc_queue *rq = &qp->rq;
+	int err = 0;
+	int i;
+
+	if (qp->is_created) {
+		err = flush_qp(mfc_dev, qp, 1, 0, &vhba->fcmd.fc_cq, 0);
+		if (err)
+			dev_err(mfc_dev->dma_dev,
+				"Error flushing FCMD qp err=%d\n", err);
+
+	}
+
+	for (i = 0; i < vhba->num_fexch; ++i)
+		mfc_destroy_fexch(vhba, i);
+
+	for (i = 0; i < num_online_cpus(); ++i)
+		mfc_destroy_cq(&vhba->fexch_cq[i]);
+
+	for (i = 0; i < vhba->num_reserved_xid; ++i)
+		clear_bit(i, vhba->fexch_bm.addr);
+
+	if (!mfc_bitmap_empty(&vhba->fexch_bm))
+		dev_warn(mfc_dev->dma_dev,
+			 "uncompleted exchanges while destroying FCMD: %s\n",
+			 mfc_bitmap_print(&vhba->fexch_bm));
+
+	mfc_bitmap_free(&vhba->fexch_bm);
+
+	for (i = 0; i < vhba->num_fexch; i++) {
+		if (!vhba->fexch[i].response_buf)
+			break;
+		kfree(vhba->fexch[i].response_buf);
+	}
+	vfree(vhba->fexch);
+
+	mfc_destroy_cq(&vhba->fcmd.fc_cq);
+	if (qp->is_created)
+		mlx4_qp_to_reset(mfc_dev->dev, &qp->mqp);
+	qp->is_created = 0;
+	mlx4_qp_remove(mfc_dev->dev, &qp->mqp);
+	mlx4_qp_free(mfc_dev->dev, &qp->mqp);
+	mlx4_qp_release_range(mfc_dev->dev, vhba->fcmd.fc_qp.mqp.qpn, 1);
+	mlx4_free_hwq_res(mfc_dev->dev, &qp->wqres, qp->buf_size);
+
+	for (i = 0; i < sq->size; i++) {
+		if (!mfc_q_info_get(sq, i, struct fcp_cmnd *))
+			break;
+		kfree(mfc_q_info_get(sq, i, struct fcp_cmnd *));
+	}
+	mfc_q_destroy(sq);
+	mfc_q_destroy(rq);
+}
+
+int mfc_reset_fcmd(struct mfc_vhba *vhba)
+{
+	int err = 0;
+
+	mfc_destroy_fcmd(vhba);
+	err = mfc_create_fcmd(vhba);
+	if (err)
+		dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+			"FAIL to create FCMD err=%d\n", err);
+
+	return err;
+}
+
+int mfc_init_fcmd(struct mfc_vhba *vhba)
+{
+	struct mfc_dev *mfc_dev = vhba->mfc_port->mfc_dev;
+	struct mfc_qp *qp = &vhba->fcmd.fc_qp;
+	enum mlx4_qp_state qp_state = MLX4_QP_STATE_RST;
+	int rc = 0;
+	int i;
+	u8 sched_q = 0;
+	struct mlx4_qp_context context;
+
+	if (vhba->net_type == NET_IB)
+		sched_q = 0x83 |
+		    (vhba->dest_ib_sl & 0xf) << 3 |
+		    (vhba->mfc_port->port - 1) << 6;
+	else if (vhba->net_type == NET_ETH)
+		sched_q = 0x83 |
+		    vhba->fc_vlan_prio << 3 | (vhba->mfc_port->port - 1) << 6;
+
+	context = (struct mlx4_qp_context) {
+		.flags = cpu_to_be32(QPC_SERVICE_TYPE_FCMD << 16),
+		.pd = cpu_to_be32(mfc_dev->priv_pdn),
+		.mtu_msgmax = 0x77,
+		.sq_size_stride =
+		    ilog2(vhba->
+			  num_fexch) << 3 | ilog2(FCMD_SQ_BB_SIZE >> 4) |
+		    SQ_NO_PREFETCH,
+		/* this means RQ_NUM_WQES=1, and RQ_WQE_SIZE=1 */
+		.rq_size_stride = 0,
+		.usr_page = cpu_to_be32(mfc_dev->priv_uar.index),
+		.local_qpn = cpu_to_be32(qp->mqp.qpn),
+		.pri_path.sched_queue = sched_q,
+		.pri_path.counter_index = 0xff,
+		.pri_path.ackto =  (vhba->net_type == NET_IB) ?
+			MLX4_LINK_TYPE_IB : MLX4_LINK_TYPE_ETH,
+		.pri_path.grh_mylmc =  (vhba->net_type == NET_IB) ?
+				       0 : vhba->rfci[RFCI_DATA].fc_mac_idx,
+		.params2 =
+		    cpu_to_be32((qp->wqres.buf.direct.
+				 map & (PAGE_SIZE - 1)) & 0xfc0),
+		.cqn_send = cpu_to_be32(vhba->fcmd.fc_cq.mcq.cqn),
+		.cqn_recv = cpu_to_be32(vhba->fcmd.fc_cq.mcq.cqn),
+		.db_rec_addr = cpu_to_be64(qp->wqres.db.dma),
+		.srqn = 0,
+		.VE = 0,
+		.exch_base = cpu_to_be16(vhba->base_fexch_qpn),
+		.exch_size = ilog2(vhba->num_fexch),
+		.my_fc_id_idx = vhba->idx,
+		.qkey = cpu_to_be32(MLX4_FCOIB_QKEY),
+	};
+
+	if (vhba->fc_vlan_id != -1) {
+		context.pri_path.fl = 0x40;
+		context.pri_path.vlan_index = vhba->fc_vlan_idx;
+	}
+
+	rc = mlx4_qp_to_ready(mfc_dev->dev, &qp->wqres.mtt, &context, &qp->mqp,
+			      &qp_state);
+	if (rc) {
+		dev_err(mfc_dev->dma_dev,
+			"Fail to bring FCMD QP to ready rc=%d\n", rc);
+		goto out;
+	}
+
+	if (qp_state != MLX4_QP_STATE_RST)
+		qp->is_created = 1;
+
+	if (qp_state != MLX4_QP_STATE_RTS) {
+		dev_err(mfc_dev->dma_dev,
+			"Error bringing FCMD QP to RTS state\n");
+		rc = -EINVAL;
+		goto out;
+	}
+
+	/* bring FEXCHs to ready state */
+	for (i = 0; i < vhba->num_fexch; i++) {
+		rc = mfc_init_fexch(vhba, i);
+		if (rc) {
+			dev_err(mfc_dev->dma_dev,
+				"Failed init of FEXCH %d for vhba, err=%d\n",
+				i, rc);
+			goto out;
+		}
+	}
+	qp->is_flushing = 0;
+out:
+	return rc;
+}
+
+static inline void set_ctrl_seg(struct mfc_ctrl_seg *ctrl, int size,
+				u8 seqid, u8 info, u8 ls, u32 task_retry_id)
+{
+	ctrl->size = cpu_to_be16(((size / 16) & 0x3f) | (1 << 7));
+	ctrl->flags = cpu_to_be32(MFC_BIT_TX_COMP |	/* request completion */
+				  (seqid << 24) | (info << 20) | (ls << 16));
+	ctrl->parameter = cpu_to_be32(task_retry_id);
+}
+
+static inline int prepare_fexch(struct mfc_vhba *vhba, struct scsi_cmnd *scmd)
+{
+	struct mfc_exch *fexch;
+	struct mfc_dev *mfc_dev = vhba->mfc_port->mfc_dev;
+	int fexch_idx;
+	int rc = 0;
+	int index;
+
+	fexch_idx = mfc_bitmap_slot_alloc(&vhba->fexch_bm, 0);
+	if (fexch_idx == -1) {
+		dev_err(mfc_dev->dma_dev, "No free FEXCH\n");
+		rc = -ENOMEM;
+		goto err_out;
+	}
+
+	fexch = &vhba->fexch[fexch_idx];
+
+	if (fexch->state == FEXCH_ABORT)
+		dev_err(mfc_dev->dma_dev,
+			"ERROR: Trying to send new FCMD on aborting FEXCH\n");
+
+	fexch->state = FEXCH_OK;
+	fexch->tx_completed = 0;
+	rc = mfc_map_fmr(mfc_dev, scmd, fexch);
+	if (rc) {
+		dev_err(mfc_dev->dma_dev,
+			"Could not map SCSI sg to MFR exch no %d, err=%d, cmd"
+			" bufflen=%d, num_sg=%d, fmr_pagesize=%d, pages=%d\n",
+			fexch_idx, rc, scsi_bufflen(scmd),
+			scsi_sg_count(scmd), (1 << fexch->fmr.page_shift),
+			fexch->fmr.max_pages);
+		mfc_bitmap_slot_free(&vhba->fexch_bm, fexch_idx);
+		goto err_out;
+	}
+
+	index = mfc_post_rx_buf(mfc_dev, &fexch->fc_qp, fexch->response_buf,
+				MFC_CMD_RX_SKB_BUFSIZE);
+	if (index < 0) {
+		mfc_bitmap_slot_free(&vhba->fexch_bm, fexch_idx);
+		rc = -ENOMEM;
+		goto err_out;
+	}
+
+	scmd->SCp.ptr = (char *)fexch;
+	fexch->scmd = scmd;
+
+	mfc_ring_db_rx(&fexch->fc_qp);
+
+	return fexch_idx;
+err_out:
+	return rc;
+}
+
+static inline void set_init_seg(struct mfc_init_seg *init, int frame_size,
+				u32 remote_fid,
+				enum dma_data_direction data_dir, int fexch_idx)
+{
+	init->pe = 0;		/* priority enable, goes to F_CTL[17] */
+	init->cs_ctl = 0;	/* CS_CTL/Priority field */
+	init->seq_id_tx = 0;	/* seq. id to be used in FCP_DATA frames */
+	init->mtu = cpu_to_be16(frame_size / 4);
+	init->remote_fid[2] = (remote_fid) & 0xff;
+	init->remote_fid[1] = (remote_fid >> 8) & 0xff;
+	init->remote_fid[0] = (remote_fid >> 16) & 0xff;
+
+	init->flags = (1 << 1) |
+		      (scsi_dir_translate(data_dir) << 3) | (0x0 << 6);
+
+	/* initiators never know remote exch no. at beginning of exch */
+	init->remote_exch = cpu_to_be16(0xffff);
+	/* alloc free exchange, put index here */
+	init->local_exch_idx = cpu_to_be16(fexch_idx);
+}
+
+static inline void set_eth_dgram_seg(struct mfc_eth_addr_seg *addr, u8 * dmac)
+{
+	addr->static_rate = 0;
+	memcpy(&addr->dmac, dmac, ETH_ALEN);
+}
+
+static inline void set_ib_dgram_seg(struct mfc_datagram_seg *dgram,
+				    int dest_lid, int dest_sl,
+				    unsigned long dest_qpn)
+{
+	dgram->mlid_grh = 0;	/* no GRH */
+	dgram->rlid = cpu_to_be16(dest_lid);	/* remote LID */
+	dgram->stat_rate = 0;	/* no rate limit */
+	dgram->sl_tclass_flabel = cpu_to_be32(dest_sl << 28);
+	dgram->dqpn = cpu_to_be32(dest_qpn);
+}
+
+int mfc_queuecommand(struct scsi_cmnd *scmd, void (*done) (struct scsi_cmnd *))
+{
+	struct fc_lport *lp;
+	struct mfc_vhba *vhba;
+	struct mfc_dev *mfc_dev;
+	struct mlx4_dev *mdev;
+	struct mfc_queue *sq;
+	int fexch_idx;
+	struct fc_rport *rport;
+	struct mfc_data_seg *data = NULL;
+	struct mfc_ctrl_seg *ctrl = NULL;
+	struct mfc_init_seg *init = NULL;
+	dma_addr_t dma;
+	struct fcp_cmnd *cdb_cmd;
+	u32 index, prod;
+	__be32 op_own;
+	unsigned long flags;
+	int rc;
+
+	lp = shost_priv(scmd->device->host);
+	vhba = lport_priv(lp);
+
+	rport = starget_to_rport(scsi_target(scmd->device));
+	rc = fc_remote_port_chkready(rport);
+	if (rc) {
+		scmd->result = rc;
+		done(scmd);
+		return 0;
+	}
+
+	if (vhba->fcmd.fc_qp.is_flushing) {
+		scmd->result = DID_BUS_BUSY << 16;
+		done(scmd);
+		return 0;
+	}
+
+	if (!*(struct fc_remote_port **)rport->dd_data) {
+		/*
+		 * rport is transitioning from blocked/deleted to
+		 * online
+		 */
+		dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+			"rport %x in transitioning to online\n",
+			rport->port_id);
+		scmd->result = DID_IMM_RETRY << 16;
+		done(scmd);
+		return 0;
+	}
+
+	if ((lp->state != LPORT_ST_READY) || lp->qfull || !lp->link_up) {
+		dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+			"lport state=%d qfull=%d link_up=%d\n",
+			lp->state, lp->qfull, lp->link_up);
+		return SCSI_MLQUEUE_HOST_BUSY;
+	}
+
+	scmd->scsi_done = done;
+	scmd->result = DID_OK << 16;
+
+	mfc_dev = vhba->mfc_port->mfc_dev;
+	mdev = mfc_dev->dev;
+	sq = &vhba->fcmd.fc_qp.sq;
+
+	if (spin_trylock_irqsave(&cq_poll, flags)) {
+		mfc_cq_clean(&vhba->fcmd.fc_cq);
+		spin_unlock_irqrestore(&cq_poll, flags);
+	}
+
+	/* Check available SQ BBs + 1 spare SQ BB for owenership */
+	spin_lock_irqsave(&sq->lock, flags);
+	if (unlikely((u32) (sq->prod - sq->cons - 1) > sq->size - 2)) {
+		spin_unlock_irqrestore(&sq->lock, flags);
+		return SCSI_MLQUEUE_HOST_BUSY;
+	}
+	spin_unlock_irqrestore(&sq->lock, flags);
+
+	/* allocate and prepare FEXCH for command */
+	fexch_idx = prepare_fexch(vhba, scmd);
+	if (fexch_idx < 0)
+		return SCSI_MLQUEUE_HOST_BUSY;
+
+	spin_lock_irqsave(&sq->lock, flags);
+	prod = sq->prod;
+	++sq->prod;
+	spin_unlock_irqrestore(&sq->lock, flags);
+
+	index = prod & sq->size_mask;
+	cdb_cmd = mfc_q_info_get(sq, index, struct fcp_cmnd *);
+
+	vhba->fexch[fexch_idx].fcmd_wqe_idx = index;
+	vhba->fexch[fexch_idx].mtu = rport->maxframe_size / 4;
+
+	if (vhba->net_type == NET_IB) {
+		struct mfcoib_cmd_tx_desc *tx_desc;
+
+		tx_desc = sq->buf + index * FCMD_SQ_BB_SIZE;
+		ctrl = &tx_desc->ctrl;
+		init = &tx_desc->init;
+		data = &tx_desc->data;
+		set_ctrl_seg(ctrl, sizeof(struct mfcoib_cmd_tx_desc),
+			     0, 6, 0, 0);
+		set_ib_dgram_seg(&tx_desc->addr, vhba->dest_ib_lid,
+				 vhba->dest_ib_sl, vhba->dest_ib_data_qpn);
+	} else if (vhba->net_type == NET_ETH) {
+		struct mfcoe_cmd_tx_desc *tx_desc;
+
+		tx_desc = sq->buf + index * FCMD_SQ_BB_SIZE;
+		ctrl = &tx_desc->ctrl;
+		init = &tx_desc->init;
+		data = &tx_desc->data;
+		set_ctrl_seg(ctrl, sizeof(struct mfcoe_cmd_tx_desc),
+			     0, 6, 0, 0);
+		set_eth_dgram_seg(&tx_desc->addr, vhba->dest_addr);
+	}
+
+	set_init_seg(init, rport->maxframe_size, rport->port_id,
+		     scmd->sc_data_direction, fexch_idx);
+
+	/* prepare cdb command in buffer */
+	if (scmd->sc_data_direction == DMA_FROM_DEVICE)
+		cdb_cmd->fc_flags = FCP_CFL_RDDATA;
+	else if (scmd->sc_data_direction == DMA_TO_DEVICE)
+		cdb_cmd->fc_flags = FCP_CFL_WRDATA;
+	else
+		cdb_cmd->fc_flags = 0;
+
+	cdb_cmd->fc_dl = htonl(scsi_bufflen(scmd));
+	cdb_cmd->fc_flags &= ~FCP_CFL_LEN_MASK;
+	int_to_scsilun(scmd->device->lun, (struct scsi_lun *)cdb_cmd->fc_lun);
+
+	memcpy(cdb_cmd->fc_cdb, scmd->cmnd, scmd->cmd_len);
+
+	/* set data segment */
+	dma = pci_map_single(mfc_dev->dev->pdev, cdb_cmd, sizeof(*cdb_cmd),
+			     PCI_DMA_TODEVICE);
+	if (pci_dma_mapping_error(mfc_dev->dev->pdev, dma))
+		return SCSI_MLQUEUE_HOST_BUSY;
+
+	data->addr = cpu_to_be64(dma);
+	data->count = cpu_to_be32(sizeof(*cdb_cmd));
+	data->mem_type = cpu_to_be32(mfc_dev->mr.key);	/* always snoop */
+
+	op_own = cpu_to_be32(MFC_CMD_OP_SEND) |
+	    ((prod & sq->size) ? cpu_to_be32(MFC_BIT_DESC_OWN) : 0);
+
+	/*
+	 * Ensure new descirptor (and ownership of next descirptor) hits memory
+	 * before setting ownership of this descriptor to HW
+	 */
+	wmb();
+	ctrl->op_own = op_own;
+
+	/* Ring doorbell! */
+	wmb();
+	writel(vhba->fcmd.fc_qp.doorbell_qpn,
+	       mfc_dev->uar_map + MLX4_SEND_DOORBELL);
+
+	return 0;
+}
diff --git a/drivers/scsi/mlx4_fc/mfc_rfci.c b/drivers/scsi/mlx4_fc/mfc_rfci.c
new file mode 100644
index 0000000..111ceb4
--- /dev/null
+++ b/drivers/scsi/mlx4_fc/mfc_rfci.c
@@ -0,0 +1,1001 @@
+/*
+ * Copyright (c) 2010 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+
+#include <linux/mlx4/driver.h>
+#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/qp.h>
+#include <linux/mlx4/cq.h>
+
+#include <scsi/libfc.h>
+#include <scsi/fc_encode.h>
+
+#include "mfc.h"
+
+#define MLX4_CQE_QPN_MASK 0x00ffffff
+
+u8 fc_fid_flogi[] = { 0xff, 0xff, 0xfe };
+
+static void mfc_rx_rfci(struct work_struct *work);
+
+static int mfc_prepare_rx_buf(struct mfc_vhba *vhba, struct mfc_rfci *rfci)
+{
+	struct mfc_queue *rq = &rfci->fc_qp.rq;
+	struct sk_buff *skb;
+	struct mfc_rfci_rx_info *fr;
+	int index, rc = 0;
+
+	skb = dev_alloc_skb(MFC_RFCI_RX_SKB_BUFSIZE +
+			    sizeof(struct mfc_rfci_rx_info));
+	if (!skb) {
+		dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+			"No skb - rx packet dropped\n");
+		rc = -ENOMEM;
+		goto err_out;
+	}
+
+	skb_reserve(skb, sizeof(struct mfc_rfci_rx_info));
+
+	fr = (struct mfc_rfci_rx_info *)skb->head;
+	fr->vhba = vhba;
+	fr->skb = skb;
+
+	index = mfc_post_rx_buf(vhba->mfc_port->mfc_dev, &rfci->fc_qp,
+				skb->data, MFC_RFCI_RX_SKB_BUFSIZE);
+	if (index < 0) {
+		rc = index;
+		goto err_out;
+	}
+
+	mfc_q_info_get(rq, index, struct sk_buff *) = skb;
+
+err_out:
+	return rc;
+}
+
+static void mfc_rfci_unpost_rx_bufs(struct mfc_dev *mfc_dev,
+				    struct mfc_queue *rq)
+{
+	int i;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rq->lock, flags);
+	for (i = 0; i < rq->size; i++) {
+		struct sk_buff *skb;
+
+		skb = mfc_q_info_get(rq, i, struct sk_buff *);
+		if (!skb)
+			continue;
+
+		mfc_q_info_get(rq, i, struct sk_buff *) = NULL;
+
+		kfree_skb(skb);
+	}
+	spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+static struct mfc_rfci *rfci_by_qpn(struct mfc_vhba *vhba, int qpn)
+{
+	int i;
+
+	for (i = 0; (i < RFCI_NUM) && (vhba->rfci[i].fc_qp.mqp.qpn != qpn); i++)
+		;
+	return &vhba->rfci[i];
+}
+
+static void mfc_rfci_tx_comp(struct mfc_vhba *vhba, struct mlx4_cqe *cqe)
+{
+	struct mfc_rfci *rfci;
+	struct mfc_queue *sq;
+	struct sk_buff *skb;
+	u32 index;
+	unsigned long flags;
+	u64 dma = 0;
+	u32 count = 0;
+
+	rfci = rfci_by_qpn(vhba,
+			   be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK);
+
+	sq = &rfci->fc_qp.sq;
+	index = be16_to_cpu(cqe->wqe_index) & sq->size_mask;
+
+	if (vhba->net_type == NET_IB) {
+		struct mfcoib_rfci_tx_desc *tx_desc;
+
+		tx_desc = sq->buf + index * RFCI_SQ_BB_SIZE;
+		dma = be64_to_cpu(tx_desc->data.addr);
+		count = be32_to_cpu(tx_desc->data.count);
+	} else if (vhba->net_type == NET_ETH) {
+		struct mfcoe_rfci_tx_desc *tx_desc;
+
+		tx_desc = sq->buf + index * RFCI_SQ_BB_SIZE;
+		dma = be64_to_cpu(tx_desc->data.addr);
+		count = be32_to_cpu(tx_desc->data.count);
+	}
+
+	pci_unmap_single(vhba->mfc_port->mfc_dev->dev->pdev,
+			 dma, count, PCI_DMA_TODEVICE);
+
+	skb = mfc_q_info_get(sq, index, struct sk_buff *);
+	mfc_q_info_get(sq, index, struct sk_buff *) = NULL;
+	kfree_skb(skb);
+
+	spin_lock_irqsave(&sq->lock, flags);
+	++sq->cons;
+	spin_unlock_irqrestore(&sq->lock, flags);
+}
+
+static void mfc_rfci_rx_comp(struct mfc_vhba *vhba, struct mlx4_cqe *cqe)
+{
+	struct mfc_rfci *rfci;
+	struct mfc_queue *rq;
+	struct mfc_rx_desc *rx_desc;
+	u32 index;
+	int len;
+	unsigned long flags;
+	struct sk_buff *skb;
+	struct mfc_rfci_rx_info *fr;
+	int err;
+
+	rfci = rfci_by_qpn(vhba,
+			   be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK);
+
+	rq = &rfci->fc_qp.rq;
+	index = be16_to_cpu(cqe->wqe_index) & rq->size_mask;
+	rx_desc = rq->buf + (index * rq->stride);
+	pci_unmap_single(vhba->mfc_port->mfc_dev->dev->pdev,
+			 be64_to_cpu(rx_desc->data[0].addr),
+			 be32_to_cpu(rx_desc->data[0].count),
+			 PCI_DMA_FROMDEVICE);
+
+	spin_lock_irqsave(&rq->lock, flags);
+	rfci->fc_qp.rq.cons++;
+	spin_unlock_irqrestore(&rq->lock, flags);
+
+	skb = mfc_q_info_get(rq, index, struct sk_buff *);
+	if (!skb) {
+		if ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == 0x1e)
+			dev_info(vhba->mfc_port->mfc_dev->dma_dev,
+				 "skb, RFCI Error completion, rfci qpn 0x%x\n",
+				 rfci->fc_qp.mqp.qpn);
+		goto out;
+	}
+
+	mfc_q_info_get(rq, index, struct sk_buff *) = NULL;
+
+	if (vhba->lp->state == LPORT_ST_RESET ||
+	    vhba->lp->state == LPORT_ST_DISABLED || rfci->fc_qp.is_flushing)
+		goto out;
+
+	if (!vhba->rfci_rx_enabled) {
+		dev_info(vhba->mfc_port->mfc_dev->dma_dev,
+			 "RFCI RX ignored till host started, rx_enabled = %d\n",
+			 vhba->rfci_rx_enabled);
+
+		err = mfc_prepare_rx_buf(vhba, rfci);
+		if (err) {
+			dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+				"No mem - rx packet dropped\n");
+			goto free_skb;
+		}
+
+		mfc_ring_db_rx(&rfci->fc_qp);
+		goto free_skb;
+	}
+
+	len = be32_to_cpu(cqe->byte_cnt);
+	fr = (struct mfc_rfci_rx_info *)skb->head;
+
+	skb_put(skb, len);
+	skb_set_mac_header(skb, 0);
+
+	if (vhba->net_type == NET_IB)
+		skb_pull(skb, 0x2a);	/* 40 byte GRH, 2 byte reserved */
+	else if (vhba->net_type == NET_ETH)
+		skb_pull(skb, ETH_HLEN);
+
+	INIT_WORK(&fr->work, mfc_rx_rfci);
+	queue_work(vhba->mfc_port->rfci_wq, &fr->work);
+
+	err = mfc_prepare_rx_buf(vhba, rfci);
+	if (err)
+		goto free_skb;
+
+	mfc_ring_db_rx(&rfci->fc_qp);
+
+	goto out;
+
+free_skb:
+	if (skb)
+		kfree_skb(skb);
+out:
+	return;
+}
+
+int mfc_create_rfci(struct mfc_vhba *vhba, struct mfc_rfci *rfci, u64 mac)
+{
+	struct mfc_port *fc_port = vhba->mfc_port;
+	struct mfc_dev *mfc_dev = fc_port->mfc_dev;
+	struct mfc_qp *qp = &rfci->fc_qp;
+	struct mfc_queue *sq = &qp->sq;
+	struct mfc_queue *rq = &qp->rq;
+	u32 qpn = 0;
+	int err = 0;
+	int i;
+
+	if (vhba->net_type == NET_ETH) {
+		dev_info(mfc_dev->dma_dev, "create RFCI for mac 0x%llx\n", mac);
+
+		err = mlx4_register_mac(mfc_dev->dev, fc_port->port, mac,
+					&rfci->fc_mac_idx);
+		if (err) {
+			dev_err(mfc_dev->dma_dev,
+				"Could not register mac 0x%llx\n", mac);
+			goto err_out;
+		}
+	}
+
+	err = mfc_q_init(sq, RFCI_SQ_BB_SIZE, mfc_num_reserved_xids,
+			 sizeof(struct sk_buff *));
+	if (err) {
+		dev_err(mfc_dev->dma_dev, "Error initializing rfci sq\n");
+		goto err_unreg_mac;
+	}
+
+	err = mfc_q_init(rq, RFCI_RQ_WQE_SIZE, mfc_num_reserved_xids,
+			 sizeof(struct sk_buff *));
+	if (err) {
+		dev_err(mfc_dev->dma_dev, "Error initializing rfci rq\n");
+		err = -ENOMEM;
+		goto err_free_txinfo;
+	}
+
+	qp->buf_size = (sq->size * sq->stride) + (rq->size * rq->stride);
+
+	err = mlx4_alloc_hwq_res(mfc_dev->dev, &qp->wqres, qp->buf_size,
+				 qp->buf_size);
+	if (err)
+		goto err_free_rxinfo;
+
+	if (RFCI_SQ_BB_SIZE >= RFCI_RQ_WQE_SIZE) {
+		sq->buf = qp->wqres.buf.direct.buf;
+		rq->buf = sq->buf + (sq->size * sq->stride);
+	} else {
+		rq->buf = qp->wqres.buf.direct.buf;
+		sq->buf = rq->buf + (rq->size * rq->stride);
+	}
+
+	*qp->wqres.db.db = 0;
+
+	mfc_stamp_q(sq);
+	mfc_stamp_q(rq);
+
+	if (vhba->net_type == NET_IB)
+		qpn = fc_port->base_rfci_qpn + vhba->idx;
+	else if (vhba->net_type == NET_ETH) {
+		qpn = fc_port->base_rfci_qpn |
+		    (rfci->fc_mac_idx << (fc_port->n_v + fc_port->n_p));
+		if (vhba->fc_vlan_id != -1 && fc_port->n_v)
+			qpn |= (vhba->fc_vlan_idx << fc_port->n_p);
+	}
+
+	err = mlx4_qp_alloc(mfc_dev->dev, qpn, &rfci->fc_qp.mqp);
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"Could not allocate QP number 0x%x\n", qpn);
+		goto err_free_man;
+	}
+
+	qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
+
+	err = mfc_create_cq(vhba, &rfci->fc_cq, 2 * mfc_num_reserved_xids,
+			    0, 1, mfc_rfci_rx_comp, mfc_rfci_tx_comp, "RFCI");
+	if (err) {
+		dev_err(mfc_dev->dma_dev,
+			"Failed creating RFCI CQ for port %d, err=%d\n",
+			fc_port->port, err);
+		goto err_free_qp;
+	}
+
+	for (i = 0; i < rq->size - 1; i++) {
+		err = mfc_prepare_rx_buf(vhba, rfci);
+		if (err) {
+			dev_err(mfc_dev->dma_dev,
+				"Failed preparing RFCI RX desc[%d]\n", i);
+			goto err_free_cq;
+		}
+	}
+
+	mfc_ring_db_rx(&rfci->fc_qp);
+
+	return 0;
+
+err_free_cq:
+	mfc_rfci_unpost_rx_bufs(mfc_dev, &rfci->fc_qp.rq);
+	mfc_destroy_cq(&rfci->fc_cq);
+err_free_qp:
+	mlx4_qp_remove(mfc_dev->dev, &qp->mqp);
+	mlx4_qp_free(mfc_dev->dev, &qp->mqp);
+err_free_man:
+	mlx4_free_hwq_res(mfc_dev->dev, &qp->wqres, qp->buf_size);
+err_free_rxinfo:
+	mfc_q_destroy(rq);
+err_free_txinfo:
+	mfc_q_destroy(sq);
+err_unreg_mac:
+	/* TODO: IB case */
+	if (vhba->net_type == NET_ETH)
+		mlx4_unregister_mac(mfc_dev->dev, fc_port->port,
+				    rfci->fc_mac_idx);
+err_out:
+	return err;
+}
+
+int mfc_destroy_rfci(struct mfc_vhba *vhba, struct mfc_rfci *rfci)
+{
+	struct mfc_port *fc_port = vhba->mfc_port;
+	struct mfc_dev *mfc_dev = fc_port->mfc_dev;
+	struct mfc_qp *qp = &rfci->fc_qp;
+	struct mfc_queue *sq = &qp->sq;
+	struct mfc_queue *rq = &qp->rq;
+	int err;
+
+	if (qp->is_created) {
+		err = flush_qp(mfc_dev, qp, 1, 1, &rfci->fc_cq, NULL);
+		if (err) {
+			dev_err(mfc_dev->dma_dev,
+				"Error flushing RFCI qpn=0x%x err=%d\n",
+				qp->mqp.qpn, err);
+			return err;
+		}
+	}
+
+	mfc_destroy_cq(&rfci->fc_cq);
+	if (qp->is_created)
+		mlx4_qp_to_reset(mfc_dev->dev, &qp->mqp);
+	qp->is_created = 0;
+	mlx4_qp_remove(mfc_dev->dev, &qp->mqp);
+	mlx4_qp_free(mfc_dev->dev, &qp->mqp);
+	mlx4_free_hwq_res(mfc_dev->dev, &qp->wqres, qp->buf_size);
+
+	mfc_rfci_unpost_rx_bufs(mfc_dev, &rfci->fc_qp.rq);
+
+	mfc_q_destroy(rq);
+	mfc_q_destroy(sq);
+	/* TODO: IB case */
+	if (vhba->net_type == NET_ETH) {
+		mlx4_unregister_mac(mfc_dev->dev, fc_port->port,
+				    rfci->fc_mac_idx);
+		rfci->fc_mac_idx = -1;
+	}
+
+	return 0;
+}
+
+int mfc_init_rfci(struct mfc_vhba *vhba, struct mfc_rfci *rfci)
+{
+	struct mfc_port *fc_port = vhba->mfc_port;
+	struct mfc_dev *mfc_dev = fc_port->mfc_dev;
+	struct mfc_qp *qp = &rfci->fc_qp;
+	enum mlx4_qp_state qp_state = MLX4_QP_STATE_RST;
+	int err = 0;
+	u8 sched_q = 0;
+	struct mlx4_qp_context context;
+
+	if (vhba->net_type == NET_IB)
+		sched_q = 0x83 |
+		    (vhba->dest_ib_sl & 0xf) << 2 | (fc_port->port - 1) << 6;
+	else if (vhba->net_type == NET_ETH)
+		sched_q = 0x83 |
+		    (vhba->fc_vlan_prio & 0xf) << 2 | (fc_port->port - 1) << 6;
+
+	context = (struct mlx4_qp_context) {
+		.flags = cpu_to_be32(QPC_SERVICE_TYPE_RFCI << 16),
+		.pd = cpu_to_be32(mfc_dev->priv_pdn),
+		/* Raw-ETH requirement */
+		.mtu_msgmax = 0x77,
+		.sq_size_stride = ilog2(mfc_num_reserved_xids) << 3 |
+				  ilog2(RFCI_SQ_BB_SIZE >> 4),
+		.rq_size_stride = ilog2(mfc_num_reserved_xids) << 3 |
+				  ilog2(RFCI_RQ_WQE_SIZE >> 4),
+		.usr_page = cpu_to_be32(mfc_dev->priv_uar.index),
+		.local_qpn = cpu_to_be32(qp->mqp.qpn),
+		.pri_path.sched_queue = sched_q,
+		.pri_path.counter_index = 0xff,
+		.pri_path.ackto = (vhba->net_type == NET_IB) ?
+				  MLX4_LINK_TYPE_IB : MLX4_LINK_TYPE_ETH,
+		.params2 = cpu_to_be32((qp->wqres.buf.direct.map &
+					(PAGE_SIZE - 1)) & 0xfc0),
+		.cqn_send = cpu_to_be32(rfci->fc_cq.mcq.cqn),
+		.cqn_recv = cpu_to_be32(rfci->fc_cq.mcq.cqn),
+		/* we can assume that db.dma is aligned */
+		.db_rec_addr = cpu_to_be64(qp->wqres.db.dma),
+		.srqn = 0,
+		.qkey = cpu_to_be32(MLX4_FCOIB_QKEY),
+	};
+
+	err = mlx4_qp_to_ready(mfc_dev->dev, &qp->wqres.mtt, &context,
+			       &qp->mqp, &qp_state);
+
+	if (qp_state != MLX4_QP_STATE_RST)
+		qp->is_created = 1;
+
+	if (qp_state != MLX4_QP_STATE_RTS) {
+		dev_err(mfc_dev->dma_dev,
+			"Error bringing RFCI QP to RTS state\n");
+		return err;
+	}
+	return 0;
+}
+
+int mlx4_do_rfci_xmit(struct mfc_vhba *vhba, int channel,
+		      struct sk_buff *skb, u8 fceof)
+{
+	struct mfc_rfci *rfci = &vhba->rfci[RFCI_CTRL];
+	struct mfc_dev *mfc_dev = vhba->mfc_port->mfc_dev;
+	struct mfc_queue *sq = &rfci->fc_qp.sq;
+	struct mfc_ctrl_seg *ctrl = NULL;
+	struct mfc_data_seg *data = NULL;
+	struct mfc_datagram_seg *dgram;
+	int desc_size;
+	dma_addr_t dma;
+	u32 index, prod;
+	__be32 op_own;
+	unsigned long flags;
+	int offset = 0;
+	struct mfcoib_rfci_tx_desc *tx_desc_ib;
+	struct mfcoe_rfci_tx_desc *tx_desc_eth;
+	u_int tlen = 0;
+
+	spin_lock_irqsave(&sq->lock, flags);
+	if (unlikely((u32) (sq->prod - sq->cons - 1) > sq->size - 2)) {
+		dev_err(mfc_dev->dma_dev, "rfci_xmit: Out of send queue BBs\n");
+		spin_unlock_irqrestore(&sq->lock, flags);
+		return -ENOMEM;
+	}
+
+	prod = sq->prod;
+	++sq->prod;
+	spin_unlock_irqrestore(&sq->lock, flags);
+
+	index = prod & sq->size_mask;
+	mfc_q_info_get(sq, index, struct sk_buff *) = skb;
+
+	if (vhba->net_type == NET_IB) {
+		desc_size = sizeof(struct mfc_ctrl_seg) +
+		    sizeof(struct mfc_data_seg) +
+		    sizeof(struct mfc_datagram_seg);
+
+		tx_desc_ib = sq->buf + index * RFCI_SQ_BB_SIZE;
+		ctrl = &tx_desc_ib->ctrl;
+		ctrl->size = cpu_to_be16((desc_size / 16) & 0x3f);
+		ctrl->flags = cpu_to_be32(MFC_BIT_TX_COMP | MFC_BIT_TX_FCRC_CS);
+
+		dgram = &tx_desc_ib->dgram;
+		dgram->fl_portn_pd = cpu_to_be32((vhba->mfc_port->port << 24) |
+						 mfc_dev->priv_pdn);
+		dgram->mlid_grh = 0;	/* no GRH */
+		dgram->rlid = cpu_to_be16(vhba->dest_ib_lid);	/* remote LID */
+		dgram->mgid_idx = 0;
+		dgram->stat_rate = 0;	/* no rate limit */
+		dgram->sl_tclass_flabel = cpu_to_be32(0 << 28 /* SL */);
+		dgram->dqpn = cpu_to_be32((channel == RFCI_CTRL) ?
+					  vhba->dest_ib_ctrl_qpn : vhba->
+					  dest_ib_data_qpn);
+		dgram->qkey = cpu_to_be32(MLX4_FCOIB_QKEY);
+
+		data = &tx_desc_ib->data;
+		/* skip macs reserved space in skb, but not ethtype */
+		offset = sizeof(struct ethhdr) - 2;
+	} else if (vhba->net_type == NET_ETH) {
+		desc_size = sizeof(struct mfc_ctrl_seg) +
+		    sizeof(struct mfc_data_seg);
+
+		tx_desc_eth = sq->buf + index * RFCI_SQ_BB_SIZE;
+		ctrl = &tx_desc_eth->ctrl;
+		ctrl->size = cpu_to_be16((desc_size / 16) & 0x3f);
+		if (vhba->fc_vlan_id != -1) {
+			tx_desc_eth->ctrl.size |= cpu_to_be16(MFC_BIT_INS_VLAN);
+			tx_desc_eth->ctrl.vlan =
+			    cpu_to_be16(vhba->fc_vlan_id |
+					vhba->fc_vlan_prio << 13);
+		}
+
+		ctrl->flags = cpu_to_be32(MFC_BIT_TX_COMP |
+					  MFC_BIT_NO_ICRC | MFC_BIT_TX_FCRC_CS);
+		data = &tx_desc_eth->data;
+		offset = 0;
+	}
+
+	op_own = cpu_to_be32(MFC_RFCI_OP_SEND) |
+	    cpu_to_be32((u32) fceof << 16) |
+	    ((prod & sq->size) ? cpu_to_be32(MFC_BIT_DESC_OWN) : 0);
+	if (!mfc_t11_mode)
+		tlen = sizeof(struct fcoe_crc_eof_old);
+	else
+		tlen = sizeof(struct fcoe_crc_eof);
+
+	dma = pci_map_single(mfc_dev->dev->pdev, skb->data + offset,
+			     skb->len - tlen - offset, PCI_DMA_TODEVICE);
+	if (pci_dma_mapping_error(mfc_dev->dev->pdev, dma))
+		return -EINVAL;
+
+	data->addr = cpu_to_be64(dma);
+	data->count = cpu_to_be32(skb->len - tlen - offset);
+	data->mem_type = cpu_to_be32(mfc_dev->mr.key);	/* always snoop */
+
+	/* Ensure new descirptor (and ownership of next descirptor) hits memory
+	 * before setting ownership of this descriptor to HW */
+	wmb();
+	ctrl->op_own = op_own;
+
+	/* Ring doorbell! */
+	wmb();
+	writel(rfci->fc_qp.doorbell_qpn, mfc_dev->uar_map + MLX4_SEND_DOORBELL);
+
+	return 0;
+}
+
+int mfc_start_rfci_data(struct mfc_vhba *vhba, u64 mac)
+{
+	int err = 0;
+
+	/*
+	 * Remove any previously-set unicast MAC filter.
+	 * Add secondary FCoE MAC address filter for our OUI.
+	 */
+
+	err = mfc_create_rfci(vhba, &vhba->rfci[RFCI_DATA], mac);
+	if (err) {
+		dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+			"Could not create data RFCI QP, err=%d\n", err);
+		goto out;
+	}
+
+	err = mfc_init_rfci(vhba, &vhba->rfci[RFCI_DATA]);
+	if (err) {
+		dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+			"Could not init data RFCI QP, err=%d\n", err);
+		goto out;
+	}
+
+	vhba->rfci[RFCI_DATA].fc_qp.is_flushing = 0;
+out:
+	return err;
+}
+
+void mfc_recv_flogi(struct fc_lport *lp, struct fc_frame *fp, u8 sa[6])
+{
+	struct mfc_vhba *vhba = lport_priv(lp);
+	struct fc_frame_header *fh;
+	u8 op;
+
+	op = fc_frame_payload_op(fp);
+	fh = fc_frame_header_get(fp);
+	if (fh->fh_type != FC_TYPE_ELS)
+		return;
+
+	if (op == ELS_LS_ACC && fh->fh_r_ctl == FC_RCTL_ELS_REP &&
+	    vhba->flogi_oxid == ntohs(fh->fh_ox_id)) {
+		/* keep my FID */
+		memcpy(vhba->my_npid.fid, fh->fh_d_id, 3);
+
+		/* If non-FIP, learn dest addr from incoming LS_ACC */
+		if (vhba->net_type == NET_ETH) {
+			memcpy(vhba->dest_addr, sa, ETH_ALEN);
+			fc_fcoe_set_mac(vhba->rfci[RFCI_DATA].mac, fh->fh_d_id);
+		}
+
+		/* We should check rc here !!! */
+		mfc_flogi_finished(lp);
+		vhba->flogi_progress = 0;
+
+	} else if (op == ELS_FLOGI && fh->fh_r_ctl == FC_RCTL_ELS_REQ && sa)
+		/* TODO: support for p2p */
+		memcpy(vhba->dest_addr, sa, ETH_ALEN);
+}
+
+int fcoib_recvd_flogi_reply(u64 gw_fc_handle, u8 *flogi_reply,
+			    int size, u32 gw_data_qpn)
+{
+	struct fc_frame *fp;
+	struct mfc_vhba *vhba = (struct mfc_vhba *)gw_fc_handle;
+	struct fc_lport *lp = vhba->lp;
+	struct sk_buff *skb;
+	struct mfc_rfci_rx_info *fr;
+
+	skb = dev_alloc_skb(MFC_RFCI_RX_SKB_BUFSIZE +
+			    sizeof(struct mfc_rfci_rx_info));
+	if (!skb) {
+		dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+			"No skb - rx packet dropped\n");
+		return -ENOMEM;
+	}
+
+	skb_reserve(skb, sizeof(struct mfc_rfci_rx_info));
+
+	fr = (struct mfc_rfci_rx_info *)skb->head;
+	fr->vhba = vhba;
+	fr->skb = skb;
+
+	memcpy(skb_put(skb, size), flogi_reply, size);
+
+	fp = (struct fc_frame *)skb;
+	vhba->dest_ib_data_qpn = gw_data_qpn;
+
+	fc_frame_init(fp);
+	fr_eof(fp) = FC_EOF_T;
+	fr_sof(fp) = FC_SOF_I3;
+	fr_dev(fp) = lp;
+
+	if (unlikely(vhba->flogi_progress))
+		mfc_recv_flogi(lp, fp, NULL);
+
+	fc_exch_recv(lp, fp);
+
+	return 0;
+}
+EXPORT_SYMBOL(fcoib_recvd_flogi_reply);
+
+static int mfc_recv_abort_reply(struct fc_frame *fp, struct mfc_vhba *vhba)
+{
+	struct fc_frame_header *fh = fc_frame_header_get(fp);
+	struct mfc_exch *fexch;
+	int xno;
+	struct fc_ba_rjt *rjt;
+	struct fc_ba_acc *acc;
+
+	xno = ntohs(fh->fh_ox_id) - vhba->base_fexch_qpn +
+	    vhba->mfc_port->base_fexch_qpn;
+
+	fexch = &vhba->fexch[xno];
+
+	switch (fh->fh_r_ctl) {
+	case FC_RCTL_BA_RJT:
+		rjt = fc_frame_payload_get(fp, sizeof(*rjt));
+
+		if (xno > vhba->base_reserved_xid &&
+		    xno < vhba->base_reserved_xid + vhba->num_reserved_xid) {
+			dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+				"This fexch reserved, pass to upper layer\n");
+			return -1;
+		}
+
+		if (xno < 0 || xno > vhba->num_fexch) {
+			dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+				"BA_RJT with invalid fexch number %d\n", xno);
+			return -1;
+		}
+
+		dev_info(vhba->mfc_port->mfc_dev->dma_dev,
+			 "BA_RJT fexch 0x%x reason 0x%x exp 0x%x\n",
+			 xno, rjt->br_reason, rjt->br_explan);
+
+		if (fexch->state == FEXCH_SEND_ABORT)
+			fexch->state = FEXCH_ABORT;
+		break;
+
+	case FC_RCTL_BA_ACC:
+
+		acc = fc_frame_payload_get(fp, sizeof(*acc));
+
+		xno = ntohs(acc->ba_ox_id) - vhba->base_fexch_qpn +
+		    vhba->mfc_port->base_fexch_qpn;
+
+		if (xno > vhba->base_reserved_xid &&
+		    xno < vhba->base_reserved_xid + vhba->num_reserved_xid) {
+			dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+				"This fexch reserved, pass to upper layer\n");
+			return -1;
+		}
+
+		if (xno < 0 || xno > vhba->num_fexch) {
+			dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+				"BA_ACC with invalid fexch number %d.\n", xno);
+			return -1;
+		}
+
+		if (fexch->state == FEXCH_SEND_ABORT)
+			fexch->state = FEXCH_ABORT;
+
+		dev_info(vhba->mfc_port->mfc_dev->dma_dev,
+			 "BA_ACC for 0x%x fexch\n", xno);
+
+		break;
+
+	default:
+		return -1;
+	}
+
+	complete(&fexch->tm_done);
+
+	return 0;
+}
+
+static void mfc_rx_rfci(struct work_struct *work)
+{
+	struct mfc_rfci_rx_info *fr =
+	    container_of(work, struct mfc_rfci_rx_info, work);
+	u_int32_t fr_len;
+	u_int hlen;
+	u_int tlen;
+	struct mfc_vhba *vhba = fr->vhba;
+	struct fc_lport *lp = vhba->lp;
+	struct fcoe_dev_stats *stats = per_cpu_ptr(lp->dev_stats, get_cpu());
+	struct fc_frame_header *fh;
+	struct sk_buff *skb = fr->skb;
+	struct fcoe_crc_eof_old *cp;
+	enum fc_sof sof;
+	struct fc_frame *fp;
+	u8 mac[6] = { 0 };
+	struct fcoe_hdr_old *fchp;
+	u_int len;
+	struct fcoe_hdr *hp;
+	int rc;
+
+	/*
+	 * Save source MAC address before discarding header.
+	 */
+	if (unlikely(vhba->flogi_progress))
+		memcpy(mac, eth_hdr(skb)->h_source, ETH_ALEN);
+
+	/*
+	 * Check the header and pull it off.
+	 */
+	hlen = vhba->fcoe_hlen;
+	if (!mfc_t11_mode) {	/* pre-T11 */
+		fchp = (struct fcoe_hdr_old *)skb->data;
+		tlen = sizeof(struct fcoe_crc_eof_old);
+		len = ntohs(fchp->fcoe_plen);
+		fr_len = FCOE_DECAPS_LEN(len);
+		fr_len = fr_len * FCOE_WORD_TO_BYTE;
+		fr_len -= sizeof(cp->fcoe_crc32);
+		skb_pull(skb, sizeof(*fchp));
+		sof = FCOE_DECAPS_SOF(len);
+		if (unlikely(fr_len + tlen > skb->len)) {
+			if (stats->ErrorFrames < 5)
+				dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+					"len error fr_len 0x%x skb->len 0x%x\n",
+					fr_len + tlen, skb->len);
+			stats->ErrorFrames++;
+			goto free_packet;
+		}
+	} else {		/* T11 */
+		hp = (struct fcoe_hdr *)skb->data;
+		skb_pull(skb, sizeof(struct fcoe_hdr));
+		tlen = sizeof(struct fcoe_crc_eof);
+		fr_len = skb->len - tlen;
+		sof = hp->fcoe_sof;
+	}
+
+	if (unlikely(fr_len < sizeof(struct fc_frame_header))) {
+		if (stats->ErrorFrames < 5)
+			dev_err(vhba->mfc_port->mfc_dev->dma_dev,
+				"length error: len_sof %x\n", fr_len);
+		stats->ErrorFrames++;
+		goto free_packet;
+	}
+
+	if (skb_is_nonlinear(skb))
+		skb_linearize(skb);	/* not ideal */
+
+	stats->RxFrames++;
+	stats->RxWords += fr_len / FCOE_WORD_TO_BYTE;
+
+	fp = (struct fc_frame *)skb;
+	fc_frame_init(fp);
+	cp = (struct fcoe_crc_eof_old *)(skb->data + fr_len);
+	fr_eof(fp) = cp->fcoe_eof;
+	fr_sof(fp) = sof;
+	fr_dev(fp) = lp;
+
+	fh = fc_frame_header_get(fp);
+
+	if (fh->fh_r_ctl == FC_RCTL_BA_ACC || fh->fh_r_ctl == FC_RCTL_BA_RJT) {
+		rc = mfc_recv_abort_reply(fp, vhba);
+		if (rc)
+			goto libfc_packet;
+		else
+			goto free_packet;
+	}
+
+	if (unlikely(vhba->flogi_progress))
+		mfc_recv_flogi(lp, fp, mac);
+
+libfc_packet:
+	fc_exch_recv(lp, fp);
+
+	/*
+	 * no need for kfree_skb() - skb was already freed inside
+	 * fc_exch_recv()
+	 */
+	return;
+
+free_packet:
+	kfree_skb(skb);
+}
+
+int mfc_frame_send(struct fc_lport *lp, struct fc_frame *fp)
+{
+	struct mfc_vhba *vhba = lport_priv(lp);
+	struct fc_frame_header *fh;
+	struct sk_buff *skb;
+	u8 sof, eof;
+	unsigned int elen;
+	unsigned int hlen;
+	unsigned int tlen;
+	int wlen;
+	struct ethhdr *eh;
+	struct fcoe_crc_eof *cp;
+	int flogi_in_progress = 0;
+	struct fcoe_hdr *hp;
+	struct fcoe_hdr_old *ohp;
+	int data_channel;
+	int rc = 0;
+
+	fh = fc_frame_header_get(fp);
+
+	skb = fp_skb(fp);
+
+	if (unlikely(fh->fh_r_ctl == FC_RCTL_ELS_REQ)) {
+		if (fc_frame_payload_op(fp) == ELS_FLOGI) {
+			vhba->flogi_oxid = ntohs(fh->fh_ox_id);
+			vhba->flogi_progress = 1;
+			flogi_in_progress = 1;
+			vhba->rfci_rx_enabled = 1;
+			if (mfc_debug_mode == 0)
+				fc_fcoe_set_mac(vhba->dest_addr, fc_fid_flogi);
+
+			if (vhba->net_type == NET_ETH && vhba->link_up) {
+				if (fcoe_ctlr_els_send(&vhba->ctlr, lp, skb)) {
+					dev_err(vhba->mfc_port->mfc_dev->
+						dma_dev,
+						"Sending FLOGI over FIP\n");
+					goto out;
+				}
+			} else if (vhba->net_type == NET_IB) {
+				vhba->fcoib_send_els_cb(vhba->
+							gw_discovery_handle,
+							(u64) vhba,
+							FLOGI_OVER_FIP,
+							skb->data,
+							vhba->rfci[RFCI_CTRL].
+							fc_qp.mqp.qpn);
+				goto out_skb_free;
+			}
+		} else if (fc_frame_payload_op(fp) == ELS_LOGO &&
+			   !memcmp(fc_fid_flogi, fh->fh_d_id, 3)) {
+
+			if (vhba->net_type == NET_ETH) {
+				if (fcoe_ctlr_els_send(&vhba->ctlr, lp, skb)) {
+					dev_info(vhba->mfc_port->mfc_dev->
+						 dma_dev,
+						 "Sending FLOGO over FIP\n");
+					goto out;
+				}
+			} else if (vhba->net_type == NET_IB) {
+				vhba->fcoib_send_els_cb(vhba->
+							gw_discovery_handle,
+							(u64) vhba,
+							LOGO_OVER_FIP,
+							skb->data,
+							vhba->rfci[RFCI_CTRL].
+							fc_qp.mqp.qpn);
+				goto out_skb_free;
+			}
+		}
+	}
+
+	if (vhba->rfci[RFCI_CTRL].fc_qp.is_flushing) {
+		rc = -1;
+		goto out_skb_free;
+	}
+
+	if (flogi_in_progress || (mfc_debug_mode == 1))
+		data_channel = RFCI_CTRL;
+	else
+		data_channel = RFCI_DATA;
+
+	sof = fr_sof(fp);
+	eof = fr_eof(fp);
+
+	if (!mfc_t11_mode) {
+		hlen = sizeof(struct fcoe_hdr_old);
+		tlen = sizeof(struct fcoe_crc_eof_old);
+	} else {
+		hlen = sizeof(struct fcoe_hdr);
+		tlen = sizeof(struct fcoe_crc_eof);
+	}
+
+	elen = sizeof(struct ethhdr);
+
+	cp = (struct fcoe_crc_eof *)skb_put(skb, tlen);
+	memset(cp, 0, sizeof(*cp));
+
+	wlen = (skb->len - tlen + sizeof(u32)) / FCOE_WORD_TO_BYTE;
+
+	/* adjust skb network/transport offsets to match mac/fcoe/fc */
+	skb_push(skb, elen + hlen);
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb->mac_len = elen;
+
+	eh = eth_hdr(skb);
+
+	if (vhba->net_type == NET_ETH) {
+		skb->protocol = htons(ETH_P_FCOE);
+		eh->h_proto = htons(ETH_P_FCOE);
+
+		if (vhba->ctlr.map_dest)
+			fc_fcoe_set_mac(eh->h_dest, fh->fh_d_id);
+		else
+			/* insert GW address */
+			memcpy(eh->h_dest, vhba->ctlr.dest_addr, ETH_ALEN);
+
+		if (unlikely(vhba->ctlr.flogi_oxid != FC_XID_UNKNOWN))
+			memcpy(eh->h_source, vhba->ctlr.ctl_src_addr, ETH_ALEN);
+		else
+			memcpy(eh->h_source, vhba->rfci[RFCI_DATA].mac,
+			       ETH_ALEN);
+	} else if (vhba->net_type == NET_IB) {
+		skb->protocol = htons(FCOIB_SIG);
+		eh->h_proto = htons(FCOIB_SIG);
+	}
+
+	if (!mfc_t11_mode) {
+		ohp = (struct fcoe_hdr_old *)(eh + 1);
+		ohp->fcoe_plen = htons(FCOE_ENCAPS_LEN_SOF(wlen, sof));
+	} else {
+		hp = (struct fcoe_hdr *)(eh + 1);
+		memset(hp, 0, sizeof(*hp));
+		if (FC_FCOE_VER)
+			FC_FCOE_ENCAPS_VER(hp, FC_FCOE_VER);
+		hp->fcoe_sof = sof;
+	}
+
+	fr_dev(fp) = lp;
+
+	rc = mlx4_do_rfci_xmit(vhba, data_channel, skb, eof);
+	if (!rc)
+		goto out;
+
+out_skb_free:
+	kfree_skb(skb);
+
+out:
+	return rc;
+}
diff --git a/drivers/scsi/mlx4_fc/mfc_sysfs.c b/drivers/scsi/mlx4_fc/mfc_sysfs.c
new file mode 100644
index 0000000..61511f2
--- /dev/null
+++ b/drivers/scsi/mlx4_fc/mfc_sysfs.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2010 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <scsi/libfc.h>
+#include "mfc.h"
+
+char *vhba_dentry_name(char *buf, struct mfc_vhba *vhba, char *str)
+{
+	snprintf(buf, VHBA_SYSFS_LEN, "%s%d_%s", "vhba",
+		 vhba->lp->host->host_no, str);
+	return buf;
+}
+
+char *fport_dentry_name(char *buf, struct mfc_port *fport, char *str)
+{
+	snprintf(buf, VHBA_SYSFS_LEN, "mlx4_%d_port%d_%s",
+		 fport->mfc_dev->idx, fport->port, str);
+	return buf;
+}
+
+#define _sprintf(p, buf, format, arg...)				\
+	(((PAGE_SIZE - (int)(p - buf)) <= 0) ?				\
+	 0 : scnprintf(p, PAGE_SIZE - (int)(p - buf), format, ## arg))
+
+#define DENTRY_REMOVE(_dentry)						\
+do {									\
+	sysfs_remove_file((_dentry)->kobj, &(_dentry)->mattr.attr);	\
+} while (0);
+
+#define DENTRY_CREATE(_ctx, _dentry, _name, _show, _store)		\
+do {									\
+	struct mfc_sysfs_attr *vdentry = _dentry;			\
+	struct module *owner = THIS_MODULE;				\
+	vdentry->ctx = _ctx;						\
+	vdentry->mattr.show = _show;					\
+	vdentry->mattr.store = _store;					\
+	vdentry->mattr.attr.name = vdentry->name;			\
+	vdentry->mattr.attr.mode = 0;					\
+	vdentry->kobj = &owner->mkobj.kobj;				\
+	snprintf(vdentry->name, VHBA_SYSFS_LEN, "%s", _name);		\
+	if (vdentry->mattr.store)					\
+		vdentry->mattr.attr.mode |= S_IWUGO;			\
+	if (vdentry->mattr.show)					\
+		vdentry->mattr.attr.mode |= S_IRUGO;			\
+	if (sysfs_create_file(vdentry->kobj, &vdentry->mattr.attr)) {	\
+		printk(KERN_WARNING "failed to create %s\n",		\
+			vdentry->mattr.attr.name);			\
+		vdentry->ctx = NULL;					\
+		break;							\
+	}								\
+} while (0);
+
+static inline struct net_device *vhba_get_netdev(struct mfc_vhba *vhba)
+{
+	return (struct net_device *)vhba->underdev;
+}
+
+static inline const char *fc_lport_state_name(enum fc_lport_state lp_state)
+{
+	static const char *fc_lport_state_names[] = {
+		[LPORT_ST_DISABLED] = "Disabled",
+		[LPORT_ST_FLOGI] = "FLOGI",
+		[LPORT_ST_DNS] = "dNS",
+		[LPORT_ST_RSPN_ID] = "RSPN_ID",
+		[LPORT_ST_RFT_ID] = "RFT_ID",
+		[LPORT_ST_SCR] = "SCR",
+		[LPORT_ST_READY] = "Ready",
+		[LPORT_ST_LOGO] = "LOGO",
+		[LPORT_ST_RESET] = "reset",
+	};
+
+	if (lp_state > LPORT_ST_RESET)
+		return "invalid_state";
+
+	return fc_lport_state_names[lp_state];
+};
+
+static ssize_t vhba_show(struct module_attribute *attr,
+			 struct module *mod, char *buf)
+{
+	char *p = buf;
+	struct mfc_sysfs_attr *vhba_dentry =
+	    container_of(attr, struct mfc_sysfs_attr, mattr);
+	struct mfc_vhba *vhba = vhba_dentry->ctx;
+	struct net_device *netdev;
+
+	switch (vhba->net_type) {
+	case NET_ETH:
+		/* FCOE VHBA */
+		netdev = vhba_get_netdev(vhba);
+
+		p += _sprintf(p, buf, "PROTO                    FCoE\n");
+		p += _sprintf(p, buf, "ETH_IF                   %s\n",
+			      netdev->name);
+		p += _sprintf(p, buf, "GW_MAC                   "
+			      MAC_PRINTF_FMT "\n",
+			      MAC_PRINTF_VAR(vhba->dest_addr));
+		p += _sprintf(p, buf, "VLAN_ID                  %d\n",
+			      vhba->fc_vlan_id);
+		p += _sprintf(p, buf, "VLAN_HW_TABLE_IDX        %d\n",
+			      vhba->fc_vlan_idx);
+		p += _sprintf(p, buf, "VLAN_PRIO                %d\n",
+			      vhba->fc_vlan_prio);
+		break;
+	case NET_IB:
+		/* FCOIB VHBA */
+		p += _sprintf(p, buf, "PROTO                    FCoIB\n");
+		p += _sprintf(p, buf, "GW_CTRL_QPN              0x%lx\n",
+			      vhba->dest_ib_ctrl_qpn);
+		p += _sprintf(p, buf, "GW_DATA_QPN              0x%lx\n",
+			      vhba->dest_ib_data_qpn);
+		p += _sprintf(p, buf, "GW_LID                   0x%x\n",
+			      vhba->dest_ib_lid);
+		break;
+	}
+	/* VHBA GENERAL */
+	p += _sprintf(p, buf, "PORT_NUM                 %d\n",
+		      vhba->mfc_port->port);
+	p += _sprintf(p, buf, "SYSFS_PORT_NAME          mlx4_%d_port%d\n",
+		      vhba->mfc_port->mfc_dev->idx, vhba->mfc_port->port);
+	p += _sprintf(p, buf, "FC_PAYLOAD               %d\n",
+		      vhba->fc_payload_size);
+	p += _sprintf(p, buf, "BASE_FEXCH_MPT           0x%x\n",
+		      vhba->base_fexch_mpt);
+	p += _sprintf(p, buf, "BASE_LIBFC_FEXCH         0x%x\n",
+		      vhba->base_reserved_xid);
+	p += _sprintf(p, buf, "NUM_LIBFC_FEXCH          %d\n",
+		      vhba->num_reserved_xid);
+	p += _sprintf(p, buf, "BASE_FEXCH_QPN           0x%x\n",
+		      vhba->base_fexch_qpn);
+	p += _sprintf(p, buf, "NUM_FEXCH                %d\n", vhba->num_fexch);
+	p += _sprintf(p, buf, "LPORT_STATE              %s\n",
+		      fc_lport_state_name(vhba->lp->state));
+
+	/* RFCI CTRL */
+	p += _sprintf(p, buf, "RFCI_CTRL_QPN            0x%x\n",
+		      vhba->rfci[RFCI_CTRL].fc_qp.mqp.qpn);
+	p += _sprintf(p, buf, "RFCI_CTRL_CQN            0x%x\n",
+		      vhba->rfci[RFCI_CTRL].fc_cq.mcq.cqn);
+
+	if (vhba->net_type == NET_ETH) {
+		p += _sprintf(p, buf,
+			      "RFCI_CTRL_MAC            " MAC_PRINTF_FMT "\n",
+			      MAC_PRINTF_VAR(vhba->rfci[RFCI_CTRL].mac));
+	}
+
+	/* RFCI DATA for fcoe only */
+	if (vhba->net_type == NET_ETH) {
+		p += _sprintf(p, buf, "RFCI_DATA_QPN            0x%x\n",
+			      vhba->rfci[RFCI_DATA].fc_qp.mqp.qpn);
+		p += _sprintf(p, buf, "RFCI_DATA_CQN            0x%x\n",
+			      vhba->rfci[RFCI_DATA].fc_cq.mcq.cqn);
+		p += _sprintf(p, buf,
+			      "RFCI_DATA_MAC            " MAC_PRINTF_FMT "\n",
+			      MAC_PRINTF_VAR(vhba->rfci[RFCI_DATA].mac));
+	}
+
+	return (ssize_t) (p - buf);
+}
+
+static ssize_t fport_show(struct module_attribute *attr,
+			  struct module *mod, char *buf)
+{
+	char *p = buf;
+	struct mfc_sysfs_attr *fport_dentry =
+	    container_of(attr, struct mfc_sysfs_attr, mattr);
+	struct mfc_port *fport = fport_dentry->ctx;
+	p += _sprintf(p, buf, "HCA_BOARD_ID             %.*s\n",
+		      MLX4_BOARD_ID_LEN, fport->mfc_dev->dev->board_id);
+	p += _sprintf(p, buf, "PCI_DEV                  %s\n",
+		      pci_name(fport->mfc_dev->dev->pdev));
+	p += _sprintf(p, buf, "BASE_FEXCH_MPT           0x%x\n",
+		      fport->base_fexch_mpt);
+	p += _sprintf(p, buf, "BASE_FEXCH_QPN           0x%x\n",
+		      fport->base_fexch_qpn);
+	p += _sprintf(p, buf, "BASE_RFCI_QPN            0x%x\n",
+		      fport->base_rfci_qpn);
+	p += _sprintf(p, buf, "NUM_FEXCH_QPS            %d\n",
+		      fport->num_fexch_qps);
+
+	return (ssize_t) (p - buf);
+}
+
+int mfc_vhba_create_dentry(struct mfc_vhba *vhba)
+{
+	char name[VHBA_SYSFS_LEN];
+
+	DENTRY_CREATE(vhba, &vhba->dentry, vhba_dentry_name(name, vhba, "info"),
+		      vhba_show, NULL);
+
+	return 0;
+}
+
+void mfc_vhba_delete_dentry(struct mfc_vhba *vhba)
+{
+	if (vhba->dentry.ctx)
+		DENTRY_REMOVE(&vhba->dentry);
+}
+
+int mfc_port_create_dentry(struct mfc_port *fport)
+{
+	char name[VHBA_SYSFS_LEN];
+
+	DENTRY_CREATE(fport, &fport->dentry,
+		      fport_dentry_name(name, fport, "info"), fport_show, NULL);
+
+	return 0;
+}
+
+void mfc_port_delete_dentry(struct mfc_port *fport)
+{
+	if (fport->dentry.ctx)
+		DENTRY_REMOVE(&fport->dentry);
+}
-- 
1.6.3.3


[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]
  Powered by Linux