[PATCH v2 3/7] IB/rdmavt: Add create queue pair functionality

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add create queue pair verbs call as well as supporting functions.

Reviewed-by: Ira Weiny <ira.weiny@xxxxxxxxx>
Reviewed-by: Harish Chegondi <harish.chegondi@xxxxxxxxx>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@xxxxxxxxx>
---
 drivers/infiniband/sw/rdmavt/qp.c |  425 +++++++++++++++++++++++++++++++++++--
 drivers/infiniband/sw/rdmavt/vt.c |    1 
 include/rdma/rdma_vt.h            |   10 +
 3 files changed, 413 insertions(+), 23 deletions(-)

diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 17dd6ab..7d1f02e 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -47,8 +47,11 @@
 
 #include <linux/bitops.h>
 #include <linux/lockdep.h>
-#include "vt.h"
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <rdma/ib_verbs.h>
 #include "qp.h"
+#include "vt.h"
 
 static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map)
 {
@@ -151,7 +154,10 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi)
 	 * If driver is not doing any QP allocation then make sure it is
 	 * providing the necessary QP functions.
 	 */
-	if (!rdi->driver_f.free_all_qps)
+	if (!rdi->driver_f.free_all_qps ||
+	    !rdi->driver_f.qp_priv_alloc ||
+	    !rdi->driver_f.qp_priv_free ||
+	    !rdi->driver_f.notify_qp_reset)
 		return -EINVAL;
 
 	/* allocate parent object */
@@ -178,7 +184,9 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi)
 	if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table))
 		goto fail_table;
 
-	return ret;
+	spin_lock_init(&rdi->n_qps_lock);
+
+	return 0;
 
 fail_table:
 	kfree(rdi->qp_dev->qp_table);
@@ -197,31 +205,29 @@ no_qp_table:
  * There should not be any QPs still in use.
  * Free memory for table.
  */
-static unsigned free_all_qps(struct rvt_dev_info *rdi)
+static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi)
 {
 	unsigned long flags;
 	struct rvt_qp *qp;
 	unsigned n, qp_inuse = 0;
 	spinlock_t *ql; /* work around too long line below */
 
-	rdi->driver_f.free_all_qps(rdi);
+	if (rdi->driver_f.free_all_qps)
+		qp_inuse = rdi->driver_f.free_all_qps(rdi);
 
 	if (!rdi->qp_dev)
-		return 0;
+		return qp_inuse;
 
 	ql = &rdi->qp_dev->qpt_lock;
-	spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
+	spin_lock_irqsave(ql, flags);
 	for (n = 0; n < rdi->qp_dev->qp_table_size; n++) {
 		qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n],
 					       lockdep_is_held(ql));
 		RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL);
-		qp =  rcu_dereference_protected(qp->next,
-						lockdep_is_held(ql));
-		while (qp) {
+
+		for (; qp; qp = rcu_dereference_protected(qp->next,
+							  lockdep_is_held(ql)))
 			qp_inuse++;
-			qp =  rcu_dereference_protected(qp->next,
-							lockdep_is_held(ql));
-		}
 	}
 	spin_unlock_irqrestore(ql, flags);
 	synchronize_rcu();
@@ -230,26 +236,190 @@ static unsigned free_all_qps(struct rvt_dev_info *rdi)
 
 void rvt_qp_exit(struct rvt_dev_info *rdi)
 {
-	u32 qps_inuse = free_all_qps(rdi);
+	u32 qps_inuse = rvt_free_all_qps(rdi);
 
-	qps_inuse = free_all_qps(rdi);
 	if (qps_inuse)
 		rvt_pr_err(rdi, "QP memory leak! %u still in use\n",
 			   qps_inuse);
 	if (!rdi->qp_dev)
 		return;
 
+	if (rdi->flags & RVT_FLAG_QP_INIT_DRIVER)
+		return; /* driver did the qp init so nothing else to do */
+
 	kfree(rdi->qp_dev->qp_table);
 	free_qpn_table(&rdi->qp_dev->qpn_table);
 	kfree(rdi->qp_dev);
 }
 
+static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
+			      struct rvt_qpn_map *map, unsigned off)
+{
+	return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
+}
+
+/*
+ * Allocate the next available QPN or
+ * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
+ */
+static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
+		     enum ib_qp_type type, u8 port)
+{
+	u32 i, offset, max_scan, qpn;
+	struct rvt_qpn_map *map;
+	u32 ret;
+
+	if (rdi->driver_f.alloc_qpn)
+		return rdi->driver_f.alloc_qpn(rdi, qpt, type, port);
+
+	if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
+		unsigned n;
+
+		ret = type == IB_QPT_GSI;
+		n = 1 << (ret + 2 * (port - 1));
+		spin_lock(&qpt->lock);
+		if (qpt->flags & n)
+			ret = -EINVAL;
+		else
+			qpt->flags |= n;
+		spin_unlock(&qpt->lock);
+		goto bail;
+	}
+
+	qpn = qpt->last + qpt->incr;
+	if (qpn >= RVT_QPN_MAX)
+		qpn = qpt->incr | ((qpt->last & 1) ^ 1);
+	/* offset carries bit 0 */
+	offset = qpn & RVT_BITS_PER_PAGE_MASK;
+	map = &qpt->map[qpn / RVT_BITS_PER_PAGE];
+	max_scan = qpt->nmaps - !offset;
+	for (i = 0;;) {
+		if (unlikely(!map->page)) {
+			get_map_page(qpt, map);
+			if (unlikely(!map->page))
+				break;
+		}
+		do {
+			if (!test_and_set_bit(offset, map->page)) {
+				qpt->last = qpn;
+				ret = qpn;
+				goto bail;
+			}
+			offset += qpt->incr;
+			/*
+			 * This qpn might be bogus if offset >= BITS_PER_PAGE.
+			 * That is OK.   It gets re-assigned below
+			 */
+			qpn = mk_qpn(qpt, map, offset);
+		} while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX);
+		/*
+		 * In order to keep the number of pages allocated to a
+		 * minimum, we scan the all existing pages before increasing
+		 * the size of the bitmap table.
+		 */
+		if (++i > max_scan) {
+			if (qpt->nmaps == RVT_QPNMAP_ENTRIES)
+				break;
+			map = &qpt->map[qpt->nmaps++];
+			/* start at incr with current bit 0 */
+			offset = qpt->incr | (offset & 1);
+		} else if (map < &qpt->map[qpt->nmaps]) {
+			++map;
+			/* start at incr with current bit 0 */
+			offset = qpt->incr | (offset & 1);
+		} else {
+			map = &qpt->map[0];
+			/* wrap to first map page, invert bit 0 */
+			offset = qpt->incr | ((offset & 1) ^ 1);
+		}
+		/* there can be no bits at shift and below */
+		WARN_ON(offset & (rdi->dparms.qos_shift - 1));
+		qpn = mk_qpn(qpt, map, offset);
+	}
+
+	ret = -ENOMEM;
+
+bail:
+	return ret;
+}
+
+static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
+{
+	struct rvt_qpn_map *map;
+
+	map = qpt->map + qpn / RVT_BITS_PER_PAGE;
+	if (map->page)
+		clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
+}
+
+/**
+ * reset_qp - initialize the QP state to the reset state
+ * @qp: the QP to reset
+ * @type: the QP type
+ */
+static void reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+		     enum ib_qp_type type)
+{
+	qp->remote_qpn = 0;
+	qp->qkey = 0;
+	qp->qp_access_flags = 0;
+
+	/*
+	 * Let driver do anything it needs to for a new/reset qp
+	 */
+	rdi->driver_f.notify_qp_reset(qp);
+
+	qp->s_flags &= RVT_S_SIGNAL_REQ_WR;
+	qp->s_hdrwords = 0;
+	qp->s_wqe = NULL;
+	qp->s_draining = 0;
+	qp->s_next_psn = 0;
+	qp->s_last_psn = 0;
+	qp->s_sending_psn = 0;
+	qp->s_sending_hpsn = 0;
+	qp->s_psn = 0;
+	qp->r_psn = 0;
+	qp->r_msn = 0;
+	if (type == IB_QPT_RC) {
+		qp->s_state = IB_OPCODE_RC_SEND_LAST;
+		qp->r_state = IB_OPCODE_RC_SEND_LAST;
+	} else {
+		qp->s_state = IB_OPCODE_UC_SEND_LAST;
+		qp->r_state = IB_OPCODE_UC_SEND_LAST;
+	}
+	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
+	qp->r_nak_state = 0;
+	qp->r_aflags = 0;
+	qp->r_flags = 0;
+	qp->s_head = 0;
+	qp->s_tail = 0;
+	qp->s_cur = 0;
+	qp->s_acked = 0;
+	qp->s_last = 0;
+	qp->s_ssn = 1;
+	qp->s_lsn = 0;
+	qp->s_mig_state = IB_MIG_MIGRATED;
+	memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
+	qp->r_head_ack_queue = 0;
+	qp->s_tail_ack_queue = 0;
+	qp->s_num_rd_atomic = 0;
+	if (qp->r_rq.wq) {
+		qp->r_rq.wq->head = 0;
+		qp->r_rq.wq->tail = 0;
+	}
+	qp->r_sge.num_sge = 0;
+}
+
 /**
  * rvt_create_qp - create a queue pair for a device
  * @ibpd: the protection domain who's device we create the queue pair for
  * @init_attr: the attributes of the queue pair
  * @udata: user data for libibverbs.so
  *
+ * Queue pair creation is mostly an rvt issue. However, drivers have their own
+ * unique idea of what queue pair numbers mean. For instance there is a reserved
+ * range for PSM.
+ *
  * Returns the queue pair on success, otherwise returns an errno.
  *
  * Called by the ib_create_qp() core verbs function.
@@ -258,15 +428,226 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 			    struct ib_qp_init_attr *init_attr,
 			    struct ib_udata *udata)
 {
+	struct rvt_qp *qp;
+	int err;
+	struct rvt_swqe *swq = NULL;
+	size_t sz;
+	size_t sg_list_sz;
+	struct ib_qp *ret = ERR_PTR(-ENOMEM);
+	struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device);
+	void *priv = NULL;
+
+	if (!rdi)
+		return ERR_PTR(-EINVAL);
+
+	if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge ||
+	    init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr ||
+	    init_attr->create_flags)
+		return ERR_PTR(-EINVAL);
+
+	/* Check receive queue parameters if no SRQ is specified. */
+	if (!init_attr->srq) {
+		if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge ||
+		    init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr)
+			return ERR_PTR(-EINVAL);
+
+		if (init_attr->cap.max_send_sge +
+		    init_attr->cap.max_send_wr +
+		    init_attr->cap.max_recv_sge +
+		    init_attr->cap.max_recv_wr == 0)
+			return ERR_PTR(-EINVAL);
+	}
+
+	switch (init_attr->qp_type) {
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+		if (init_attr->port_num == 0 ||
+		    init_attr->port_num > ibpd->device->phys_port_cnt)
+			return ERR_PTR(-EINVAL);
+	case IB_QPT_UC:
+	case IB_QPT_RC:
+	case IB_QPT_UD:
+		sz = sizeof(struct rvt_sge) *
+			init_attr->cap.max_send_sge +
+			sizeof(struct rvt_swqe);
+		swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
+		if (!swq)
+			return ERR_PTR(-ENOMEM);
+
+		sz = sizeof(*qp);
+		sg_list_sz = 0;
+		if (init_attr->srq) {
+			struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq);
+
+			if (srq->rq.max_sge > 1)
+				sg_list_sz = sizeof(*qp->r_sg_list) *
+					(srq->rq.max_sge - 1);
+		} else if (init_attr->cap.max_recv_sge > 1)
+			sg_list_sz = sizeof(*qp->r_sg_list) *
+				(init_attr->cap.max_recv_sge - 1);
+		qp = kzalloc(sz + sg_list_sz, GFP_KERNEL);
+		if (!qp)
+			goto bail_swq;
+
+		RCU_INIT_POINTER(qp->next, NULL);
+
+		/*
+		 * Driver needs to set up it's private QP structure and do any
+		 * initialization that is needed.
+		 */
+		priv = rdi->driver_f.qp_priv_alloc(rdi, qp);
+		if (!priv)
+			goto bail_qp;
+		qp->priv = priv;
+		qp->timeout_jiffies =
+			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
+				1000UL);
+		if (init_attr->srq) {
+			sz = 0;
+		} else {
+			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
+			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
+			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
+				sizeof(struct rvt_rwqe);
+			qp->r_rq.wq = vmalloc_user(sizeof(struct rvt_rwq) +
+						   qp->r_rq.size * sz);
+			if (!qp->r_rq.wq)
+				goto bail_driver_priv;
+		}
+
+		/*
+		 * ib_create_qp() will initialize qp->ibqp
+		 * except for qp->ibqp.qp_num.
+		 */
+		spin_lock_init(&qp->r_lock);
+		spin_lock_init(&qp->s_lock);
+		spin_lock_init(&qp->r_rq.lock);
+		atomic_set(&qp->refcount, 0);
+		init_waitqueue_head(&qp->wait);
+		init_timer(&qp->s_timer);
+		qp->s_timer.data = (unsigned long)qp;
+		INIT_LIST_HEAD(&qp->rspwait);
+		qp->state = IB_QPS_RESET;
+		qp->s_wq = swq;
+		qp->s_size = init_attr->cap.max_send_wr + 1;
+		qp->s_max_sge = init_attr->cap.max_send_sge;
+		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
+			qp->s_flags = RVT_S_SIGNAL_REQ_WR;
+
+		err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
+				init_attr->qp_type,
+				init_attr->port_num);
+		if (err < 0) {
+			ret = ERR_PTR(err);
+			goto bail_rq_wq;
+		}
+		qp->ibqp.qp_num = err;
+		qp->port_num = init_attr->port_num;
+		reset_qp(rdi, qp, init_attr->qp_type);
+		break;
+
+	default:
+		/* Don't support raw QPs */
+		return ERR_PTR(-EINVAL);
+	}
+
+	init_attr->cap.max_inline_data = 0;
+
 	/*
-	 * Queue pair creation is mostly an rvt issue. However, drivers have
-	 * their own unique idea of what queue pare numbers mean. For instance
-	 * there is a reserved range for PSM.
-	 *
-	 * VI-DRIVER-API: make_qpn()
-	 * Returns a valid QPN for verbs to use
+	 * Return the address of the RWQ as the offset to mmap.
+	 * See hfi1_mmap() for details.
 	 */
-	return ERR_PTR(-EOPNOTSUPP);
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		if (!qp->r_rq.wq) {
+			__u64 offset = 0;
+
+			err = ib_copy_to_udata(udata, &offset,
+					       sizeof(offset));
+			if (err) {
+				ret = ERR_PTR(err);
+				goto bail_qpn;
+			}
+		} else {
+			u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz;
+
+			qp->ip = rvt_create_mmap_info(rdi, s,
+						      ibpd->uobject->context,
+						      qp->r_rq.wq);
+			if (!qp->ip) {
+				ret = ERR_PTR(-ENOMEM);
+				goto bail_qpn;
+			}
+
+			err = ib_copy_to_udata(udata, &qp->ip->offset,
+					       sizeof(qp->ip->offset));
+			if (err) {
+				ret = ERR_PTR(err);
+				goto bail_ip;
+			}
+		}
+	}
+
+	spin_lock(&rdi->n_qps_lock);
+	if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) {
+		spin_unlock(&rdi->n_qps_lock);
+		ret = ERR_PTR(-ENOMEM);
+		goto bail_ip;
+	}
+
+	rdi->n_qps_allocated++;
+	spin_unlock(&rdi->n_qps_lock);
+
+	if (qp->ip) {
+		spin_lock_irq(&rdi->pending_lock);
+		list_add(&qp->ip->pending_mmaps, &rdi->pending_mmaps);
+		spin_unlock_irq(&rdi->pending_lock);
+	}
+
+	ret = &qp->ibqp;
+
+	/*
+	 * We have our QP and its good, now keep track of what types of opcodes
+	 * can be processed on this QP. We do this by keeping track of what the
+	 * 3 high order bits of the opcode are.
+	 */
+	switch (init_attr->qp_type) {
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+	case IB_QPT_UD:
+		qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & RVT_OPCODE_QP_MASK;
+		break;
+	case IB_QPT_RC:
+		qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & RVT_OPCODE_QP_MASK;
+		break;
+	case IB_QPT_UC:
+		qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & RVT_OPCODE_QP_MASK;
+		break;
+	default:
+		ret = ERR_PTR(-EINVAL);
+		goto bail_ip;
+	}
+
+	return ret;
+
+bail_ip:
+	kref_put(&qp->ip->ref, rvt_release_mmap_info);
+
+bail_qpn:
+	free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
+
+bail_rq_wq:
+	vfree(qp->r_rq.wq);
+
+bail_driver_priv:
+	rdi->driver_f.qp_priv_free(rdi, qp);
+
+bail_qp:
+	kfree(qp);
+
+bail_swq:
+	vfree(swq);
+
+	return ret;
 }
 
 /**
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index df2df36..e75eb3d 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -362,6 +362,7 @@ void rvt_unregister_device(struct rvt_dev_info *rdi)
 
 	ib_unregister_device(&rdi->ibdev);
 	rvt_mr_exit(rdi);
+	rvt_qp_exit(rdi);
 }
 EXPORT_SYMBOL(rvt_unregister_device);
 
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index 3a78f20..3bdeac7 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -222,7 +222,10 @@ struct rvt_driver_provided {
 	int (*port_callback)(struct ib_device *, u8, struct kobject *);
 	const char * (*get_card_name)(struct rvt_dev_info *rdi);
 	struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi);
-	void (*free_all_qps)(struct rvt_dev_info *rdi);
+	unsigned (*free_all_qps)(struct rvt_dev_info *rdi);
+	void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
+	void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
+	void (*notify_qp_reset)(struct rvt_qp *qp);
 
 	/*--------------------*/
 	/* Optional functions */
@@ -230,6 +233,8 @@ struct rvt_driver_provided {
 	int (*check_ah)(struct ib_device *, struct ib_ah_attr *);
 	void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *,
 			      struct rvt_ah *);
+	int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
+			 enum ib_qp_type type, u8 port);
 };
 
 struct rvt_dev_info {
@@ -262,7 +267,10 @@ struct rvt_dev_info {
 	int flags;
 	struct rvt_ibport **ports;
 
+	/* QP */
 	struct rvt_qp_ibdev *qp_dev;
+	u32 n_qps_allocated;    /* number of QPs allocated for device */
+	spinlock_t n_qps_lock; /* keep track of number of qps */
 
 	/* memory maps */
 	struct list_head pending_mmaps;

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux