[PATCH rdma-next 6/6] IB/core: Fix addr_req list delete corruption due to re-enqueue

Leon Romanovsky <leon@xxxxxxxxxx> · Sun, 25 Mar 2018 13:40:24 +0300

From: Parav Pandit <parav@xxxxxxxxxxxx>

mod_delayed_work() may enqueue the work item even if it is getting
executed while it is getting rescheduled via mod_delayed_work().

When process_one_req() is about to get executed, rdma_cancel_addr()
modifies the delay to 0.

This results into enqueueing the freed addr_req and work item again.
Below trace shows that one request is getting executed twice results
into list del corruption kernel crash.

Therefore, let the request gets completed whoever sees it first;
if process_one_req() sees that rdma_addr_cancel() already progressed
to cancel the request, it ignore the work item processing.

Cancel request waits for any existing work item to finish execution or
cancel any past execution. This ensures that cancel routine doesn't
free the request that workqueue might schedule.

list_del corruption, 0000000009223578->prev is
LIST_POISON2 (000000000f85575b)
WARNING: CPU: 3 PID: 340 at lib/list_debug.c:50
__list_del_entry_valid+0x9a/0xa0
CPU: 3 PID: 340 Comm: kworker/u16:4 Not tainted 4.15.0-rc2-bfix+ #26
Hardware name: Supermicro SYS-6028U-TR4+/X10DRU-i+, BIOS 2.0b 08/09/2016
Workqueue: ib_addr process_one_req [ib_core]
task: 00000000e0ed2fa9 task.stack: 00000000e1ddfff5
RIP: 0010:__list_del_entry_valid+0x9a/0xa0
RSP: 0018:ffffc9000044fde0 EFLAGS: 00010282
RAX: 0000000000000000 RBX: ffff880f8b82c200 RCX: 0000000000000000
RDX: 0000000000000001 RSI: ffff88107fccd078 RDI: ffff88107fccd078
RBP: ffffc9000044fde0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000001 R11: 0000000000000000 R12: ffff880f8b82c200
R13: ffff880f8b82c210 R14: ffff880f8b82c340 R15: ffff880f8b82c338
FS:  0000000000000000(0000) GS:ffff88107fcc0000(0000)
knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007ffec6001228 CR3: 0000000001c10004 CR4: 00000000003606e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
__list_del_entry+0xd/0x30 [ib_core]
process_one_req+0x9f/0x140 [ib_core]
process_one_work+0x1e3/0x5f0
? process_one_work+0x156/0x5f0
worker_thread+0x4d/0x3e0
kthread+0x14b/0x180
? process_one_work+0x5f0/0x5f0
? kthread_stop+0x2b0/0x2b0
ret_from_fork+0x24/0x30

Fixes: 5fff41e1f89d ("IB/core: Fix race condition in resolving IP to MAC")
Reviewed-by: Mark Bloch <markb@xxxxxxxxxxxx>
Signed-off-by: Parav Pandit <parav@xxxxxxxxxxxx>
Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx>
---
 drivers/infiniband/core/addr.c | 54 ++++++++++++++++++++++++++++++++----------
 1 file changed, 41 insertions(+), 13 deletions(-)

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index b0a52c996208..5f0386e672a2 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -63,6 +63,7 @@ struct addr_req {
 	unsigned long timeout;
 	struct delayed_work work;
 	int status;
+	bool canceled;
 	u32 seq;
 };
 
@@ -561,6 +562,14 @@ static int addr_resolve(struct sockaddr *src_in,
 	return ret;
 }
 
+static void complete_addr_req(struct addr_req *req)
+{
+	req->callback(req->status, (struct sockaddr *)&req->src_addr,
+		req->addr, req->context);
+	put_client(req->client);
+	kfree(req);
+}
+
 static void process_one_req(struct work_struct *_work)
 {
 	struct addr_req *req;
@@ -569,6 +578,16 @@ static void process_one_req(struct work_struct *_work)
 	mutex_lock(&lock);
 	req = container_of(_work, struct addr_req, work.work);
 
+	/* If rdma_addr_cancel() has canceled this request, it waits using
+	 * cancel_delayed_work_sync() before finishing the request and freeing
+	 * it, so it is guaranteed that this work item request is valid when
+	 * when work is scheduled.
+	 */
+	if (req->canceled) {
+		mutex_unlock(&lock);
+		return;
+	}
+
 	if (req->status == -ENODATA) {
 		src_in = (struct sockaddr *)&req->src_addr;
 		dst_in = (struct sockaddr *)&req->dst_addr;
@@ -586,10 +605,7 @@ static void process_one_req(struct work_struct *_work)
 	list_del(&req->list);
 	mutex_unlock(&lock);
 
-	req->callback(req->status, (struct sockaddr *)&req->src_addr,
-		req->addr, req->context);
-	put_client(req->client);
-	kfree(req);
+	complete_addr_req(req);
 }
 
 static void process_req(struct work_struct *work)
@@ -610,6 +626,12 @@ static void process_req(struct work_struct *work)
 			if (req->status && time_after_eq(jiffies, req->timeout))
 				req->status = -ETIMEDOUT;
 			else if (req->status == -ENODATA) {
+				/* Requeue the work for retrying again.
+				 * It is safe to modify the request timeout
+				 * of req because when process_req() work is
+				 * executing on ordered workqueue, other work
+				 * cannot be executing in parallel.
+				 */
 				set_timeout(&req->work, req->timeout);
 				continue;
 			}
@@ -623,13 +645,10 @@ static void process_req(struct work_struct *work)
 		list_del(&req->list);
 		/* It is safe to cancel other work items from this work item
 		 * because at a time there can be only one work item running
-		 * with this single threaded work queue.
+		 * with ordered work queue.
 		 */
 		cancel_delayed_work(&req->work);
-		req->callback(req->status, (struct sockaddr *) &req->src_addr,
-			req->addr, req->context);
-		put_client(req->client);
-		kfree(req);
+		complete_addr_req(req);
 	}
 }
 
@@ -714,19 +733,28 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
 
 void rdma_addr_cancel(struct rdma_dev_addr *addr)
 {
-	struct addr_req *req, *temp_req;
+	struct addr_req *req;
+	bool found = false;
 
 	mutex_lock(&lock);
-	list_for_each_entry_safe(req, temp_req, &req_list, list) {
+	list_for_each_entry(req, &req_list, list) {
 		if (req->addr == addr) {
 			req->status = -ECANCELED;
+			req->canceled = true;
 			req->timeout = jiffies;
-			list_move(&req->list, &req_list);
-			set_timeout(&req->work, req->timeout);
+			list_del(&req->list);
+			found = true;
 			break;
 		}
 	}
 	mutex_unlock(&lock);
+
+	if (!found)
+		return;
+
+	/* Wait for process_one_req() to finish accessing this work */
+	cancel_delayed_work_sync(&req->work);
+	complete_addr_req(req);
 }
 EXPORT_SYMBOL(rdma_addr_cancel);
 
-- 
2.14.3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html