From: Parav Pandit <parav@xxxxxxxxxxxx> mod_delayed_work() may enqueue the work item even if it is getting executed while it is getting rescheduled via mod_delayed_work(). When process_one_req() is about to get executed, rdma_cancel_addr() modifies the delay to 0. This results into enqueueing the freed addr_req and work item again. Below trace shows that one request is getting executed twice results into list del corruption kernel crash. Therefore, let the request gets completed whoever sees it first; if process_one_req() sees that rdma_addr_cancel() already progressed to cancel the request, it ignore the work item processing. Cancel request waits for any existing work item to finish execution or cancel any past execution. This ensures that cancel routine doesn't free the request that workqueue might schedule. list_del corruption, 0000000009223578->prev is LIST_POISON2 (000000000f85575b) WARNING: CPU: 3 PID: 340 at lib/list_debug.c:50 __list_del_entry_valid+0x9a/0xa0 CPU: 3 PID: 340 Comm: kworker/u16:4 Not tainted 4.15.0-rc2-bfix+ #26 Hardware name: Supermicro SYS-6028U-TR4+/X10DRU-i+, BIOS 2.0b 08/09/2016 Workqueue: ib_addr process_one_req [ib_core] task: 00000000e0ed2fa9 task.stack: 00000000e1ddfff5 RIP: 0010:__list_del_entry_valid+0x9a/0xa0 RSP: 0018:ffffc9000044fde0 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff880f8b82c200 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffff88107fccd078 RDI: ffff88107fccd078 RBP: ffffc9000044fde0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffff880f8b82c200 R13: ffff880f8b82c210 R14: ffff880f8b82c340 R15: ffff880f8b82c338 FS: 0000000000000000(0000) GS:ffff88107fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffec6001228 CR3: 0000000001c10004 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __list_del_entry+0xd/0x30 [ib_core] process_one_req+0x9f/0x140 [ib_core] process_one_work+0x1e3/0x5f0 ? process_one_work+0x156/0x5f0 worker_thread+0x4d/0x3e0 kthread+0x14b/0x180 ? process_one_work+0x5f0/0x5f0 ? kthread_stop+0x2b0/0x2b0 ret_from_fork+0x24/0x30 Fixes: 5fff41e1f89d ("IB/core: Fix race condition in resolving IP to MAC") Reviewed-by: Mark Bloch <markb@xxxxxxxxxxxx> Signed-off-by: Parav Pandit <parav@xxxxxxxxxxxx> Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx> --- drivers/infiniband/core/addr.c | 54 ++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index b0a52c996208..5f0386e672a2 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -63,6 +63,7 @@ struct addr_req { unsigned long timeout; struct delayed_work work; int status; + bool canceled; u32 seq; }; @@ -561,6 +562,14 @@ static int addr_resolve(struct sockaddr *src_in, return ret; } +static void complete_addr_req(struct addr_req *req) +{ + req->callback(req->status, (struct sockaddr *)&req->src_addr, + req->addr, req->context); + put_client(req->client); + kfree(req); +} + static void process_one_req(struct work_struct *_work) { struct addr_req *req; @@ -569,6 +578,16 @@ static void process_one_req(struct work_struct *_work) mutex_lock(&lock); req = container_of(_work, struct addr_req, work.work); + /* If rdma_addr_cancel() has canceled this request, it waits using + * cancel_delayed_work_sync() before finishing the request and freeing + * it, so it is guaranteed that this work item request is valid when + * when work is scheduled. + */ + if (req->canceled) { + mutex_unlock(&lock); + return; + } + if (req->status == -ENODATA) { src_in = (struct sockaddr *)&req->src_addr; dst_in = (struct sockaddr *)&req->dst_addr; @@ -586,10 +605,7 @@ static void process_one_req(struct work_struct *_work) list_del(&req->list); mutex_unlock(&lock); - req->callback(req->status, (struct sockaddr *)&req->src_addr, - req->addr, req->context); - put_client(req->client); - kfree(req); + complete_addr_req(req); } static void process_req(struct work_struct *work) @@ -610,6 +626,12 @@ static void process_req(struct work_struct *work) if (req->status && time_after_eq(jiffies, req->timeout)) req->status = -ETIMEDOUT; else if (req->status == -ENODATA) { + /* Requeue the work for retrying again. + * It is safe to modify the request timeout + * of req because when process_req() work is + * executing on ordered workqueue, other work + * cannot be executing in parallel. + */ set_timeout(&req->work, req->timeout); continue; } @@ -623,13 +645,10 @@ static void process_req(struct work_struct *work) list_del(&req->list); /* It is safe to cancel other work items from this work item * because at a time there can be only one work item running - * with this single threaded work queue. + * with ordered work queue. */ cancel_delayed_work(&req->work); - req->callback(req->status, (struct sockaddr *) &req->src_addr, - req->addr, req->context); - put_client(req->client); - kfree(req); + complete_addr_req(req); } } @@ -714,19 +733,28 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr, void rdma_addr_cancel(struct rdma_dev_addr *addr) { - struct addr_req *req, *temp_req; + struct addr_req *req; + bool found = false; mutex_lock(&lock); - list_for_each_entry_safe(req, temp_req, &req_list, list) { + list_for_each_entry(req, &req_list, list) { if (req->addr == addr) { req->status = -ECANCELED; + req->canceled = true; req->timeout = jiffies; - list_move(&req->list, &req_list); - set_timeout(&req->work, req->timeout); + list_del(&req->list); + found = true; break; } } mutex_unlock(&lock); + + if (!found) + return; + + /* Wait for process_one_req() to finish accessing this work */ + cancel_delayed_work_sync(&req->work); + complete_addr_req(req); } EXPORT_SYMBOL(rdma_addr_cancel); -- 2.14.3 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html