[PATCH rdma-next] IB/core: Add an unbound WQ type to the new CQ API

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Jack Morgenstein <jackm@xxxxxxxxxxxxxxxxxx>

The upstream kernel commit cited below modified the workqueue in the
new CQ API to be bound to a specific CPU (instead of being unbound).
This caused ALL users of the new CQ API to use the same bound WQ.

Specifically, MAD handling was severely delayed when the CPU bound
to the WQ was busy handling (higher priority) interrupts.

This caused a delay in the MAD "heartbeat" response handling,
which resulted in ports being incorrectly classified as "down".

To fix this, add a new "unbound" WQ type to the new CQ API, so that users
have the option to choose either a bound WQ or an unbound WQ.

For MADs, choose the new "unbound" WQ.

Fixes: b7363e67b23e ("IB/device: Convert ib-comp-wq to be CPU-bound")
Signed-off-by: Jack Morgenstein <jackm@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx>
---
 drivers/infiniband/core/cq.c     | 23 +++++++++++++++++++++++
 drivers/infiniband/core/device.c | 15 ++++++++++++++-
 drivers/infiniband/core/mad.c    |  2 +-
 include/rdma/ib_verbs.h          |  8 +++++---
 4 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index af5ad6a56ae4..5fefe4f7586b 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -120,6 +120,23 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
 	queue_work(ib_comp_wq, &cq->work);
 }
 
+static void ib_cq_poll_unbound_work(struct work_struct *work)
+{
+	struct ib_cq *cq = container_of(work, struct ib_cq, work);
+	int completed;
+
+	completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE, cq->wc,
+				    IB_POLL_BATCH);
+	if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
+	    ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
+		queue_work(ib_comp_unbound_wq, &cq->work);
+}
+
+static void ib_cq_completion_unbound_workqueue(struct ib_cq *cq, void *private)
+{
+	queue_work(ib_comp_unbound_wq, &cq->work);
+}
+
 /**
  * __ib_alloc_cq - allocate a completion queue
  * @dev:		device to allocate the CQ for
@@ -179,6 +196,11 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
 		INIT_WORK(&cq->work, ib_cq_poll_work);
 		ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 		break;
+	case IB_POLL_UNBOUND_WORKQUEUE:
+		cq->comp_handler = ib_cq_completion_unbound_workqueue;
+		INIT_WORK(&cq->work, ib_cq_poll_unbound_work);
+		ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+		break;
 	default:
 		ret = -EINVAL;
 		goto out_free_wc;
@@ -213,6 +235,7 @@ void ib_free_cq(struct ib_cq *cq)
 		irq_poll_disable(&cq->iop);
 		break;
 	case IB_POLL_WORKQUEUE:
+	case IB_POLL_UNBOUND_WORKQUEUE:
 		cancel_work_sync(&cq->work);
 		break;
 	default:
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index b8144f194777..f9f28632177b 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -61,6 +61,7 @@ struct ib_client_data {
 };
 
 struct workqueue_struct *ib_comp_wq;
+struct workqueue_struct *ib_comp_unbound_wq;
 struct workqueue_struct *ib_wq;
 EXPORT_SYMBOL_GPL(ib_wq);
 
@@ -1168,10 +1169,19 @@ static int __init ib_core_init(void)
 		goto err;
 	}
 
+	ib_comp_unbound_wq =
+		alloc_workqueue("ib-comp-unb-wq",
+				WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
+				WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
+	if (!ib_comp_unbound_wq) {
+		ret = -ENOMEM;
+		goto err_comp;
+	}
+
 	ret = class_register(&ib_class);
 	if (ret) {
 		pr_warn("Couldn't create InfiniBand device class\n");
-		goto err_comp;
+		goto err_comp_unbound;
 	}
 
 	ret = rdma_nl_init();
@@ -1220,6 +1230,8 @@ static int __init ib_core_init(void)
 	rdma_nl_exit();
 err_sysfs:
 	class_unregister(&ib_class);
+err_comp_unbound:
+	destroy_workqueue(ib_comp_unbound_wq);
 err_comp:
 	destroy_workqueue(ib_comp_wq);
 err:
@@ -1239,6 +1251,7 @@ static void __exit ib_core_cleanup(void)
 	rdma_nl_exit();
 	class_unregister(&ib_class);
 	destroy_workqueue(ib_comp_wq);
+	destroy_workqueue(ib_comp_unbound_wq);
 	/* Make sure that any pending umem accounting work is done. */
 	destroy_workqueue(ib_wq);
 }
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index ef459f2f2eeb..b8977c3db5f3 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -3183,7 +3183,7 @@ static int ib_mad_port_open(struct ib_device *device,
 		cq_size *= 2;
 
 	port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
-			IB_POLL_WORKQUEUE);
+			IB_POLL_UNBOUND_WORKQUEUE);
 	if (IS_ERR(port_priv->cq)) {
 		dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
 		ret = PTR_ERR(port_priv->cq);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 73c780c99482..45679c99f5b2 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -71,6 +71,7 @@
 
 extern struct workqueue_struct *ib_wq;
 extern struct workqueue_struct *ib_comp_wq;
+extern struct workqueue_struct *ib_comp_unbound_wq;
 
 union ib_gid {
 	u8	raw[16];
@@ -1569,9 +1570,10 @@ struct ib_ah {
 typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
 
 enum ib_poll_context {
-	IB_POLL_DIRECT,		/* caller context, no hw completions */
-	IB_POLL_SOFTIRQ,	/* poll from softirq context */
-	IB_POLL_WORKQUEUE,	/* poll from workqueue */
+	IB_POLL_DIRECT,		   /* caller context, no hw completions */
+	IB_POLL_SOFTIRQ,	   /* poll from softirq context */
+	IB_POLL_WORKQUEUE,	   /* poll from workqueue */
+	IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */
 };
 
 struct ib_cq {
-- 
2.14.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux