Re: Possible Race Condition on SIGKILL

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 2013-01-09 at 15:52 -0500, Trond Myklebust wrote:
+AD4- On Wed, 2013-01-09 at 12:55 -0500, Chris Perl wrote:
+AD4- +AD4- +AD4- Hrm.  I guess I'm in over my head here. Apologoies if I'm just asking
+AD4- +AD4- +AD4- silly bumbling questions.  You can start ignoring me at any time. :)
+AD4- +AD4- 
+AD4- +AD4- I stared at the code for a while and more and now see why what I
+AD4- +AD4- outlined is not possible.  Thanks for helping to clarify+ACE-
+AD4- +AD4- 
+AD4- +AD4- I decided to pull your git repo and compile with HEAD at
+AD4- +AD4- 87ed50036b866db2ec2ba16b2a7aec4a2b0b7c39 (linux-next as of this
+AD4- +AD4- morning).  Using this kernel, I can no longer induce any hangs.
+AD4- +AD4- 
+AD4- +AD4- Interestingly, I tried recompiling the CentOS 6.3 kernel with
+AD4- +AD4- both the original patch (v4) and the last patch you sent about fixing
+AD4- +AD4- priority queues.  With both of those in place, I still run into a
+AD4- +AD4- problem.
+AD4- +AD4- 
+AD4- +AD4- echo 0 +AD4- /proc/sys/sunrpc/rpc+AF8-debug after the hang shows (I left in the
+AD4- +AD4- previous additional prints and added printing of the tasks pointer
+AD4- +AD4- itself):
+AD4- +AD4- 
+AD4- +AD4- +ADw-6+AD4-client: ffff88082896c200, xprt: ffff880829011000, snd+AF8-task: ffff880829a1aac0
+AD4- +AD4- +ADw-6+AD4-client: ffff8808282b5600, xprt: ffff880829011000, snd+AF8-task: ffff880829a1aac0
+AD4- +AD4- +ADw-6+AD4---task-- -pid- flgs status -client- --rqstp- -timeout ---ops--
+AD4- +AD4- +ADw-6+AD4-ffff88082a463180 22007 0080    -11 ffff8808282b5600   (null)        0 ffffffffa027b7a0 nfsv3 ACCESS a:call+AF8-reserveresult q:xprt+AF8-sending
+AD4- +AD4- +ADw-6+AD4-client: ffff88082838cc00, xprt: ffff88082b7c5800, snd+AF8-task:   (null)
+AD4- +AD4- +ADw-6+AD4-client: ffff8808283db400, xprt: ffff88082b7c5800, snd+AF8-task:   (null)
+AD4- +AD4- +ADw-6+AD4-client: ffff8808283db200, xprt: ffff880829011000, snd+AF8-task: ffff880829a1aac0
+AD4- +AD4- 
+AD4- +AD4- Any thoughts about other patches that might affect this?
+AD4- 
+AD4- Hmm... The only one that springs to mind is this one (see attachment)
+AD4- and then the 'connect' fixes that you helped us with previously.

Never mind. I suspect that the main reason why RHEL-6.3 is still
vulnerable is that it lacks commit
961a828df64979d2a9faeeeee043391670a193b9 (SUNRPC: Fix potential races in
xprt+AF8-lock+AF8-write+AF8-next()).

-- 
Trond Myklebust
Linux NFS client maintainer

NetApp
Trond.Myklebust+AEA-netapp.com
www.netapp.com
From 961a828df64979d2a9faeeeee043391670a193b9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@xxxxxxxxxx>
Date: Tue, 17 Jan 2012 22:57:37 -0500
Subject: [PATCH] SUNRPC: Fix potential races in xprt_lock_write_next()

We have to ensure that the wake up from the waitqueue and the assignment
of xprt->snd_task are atomic. We can do this by assigning the snd_task
while under the waitqueue spinlock.

Signed-off-by: Trond Myklebust <Trond.Myklebust@xxxxxxxxxx>
---
 fs/nfs/nfs4_fs.h             |  1 +
 fs/nfs/nfs4proc.c            | 13 +++++++-----
 fs/nfs/nfs4state.c           | 17 ++++++++-------
 include/linux/sunrpc/sched.h |  3 +++
 net/sunrpc/sched.c           | 42 +++++++++++++++++++++++++++++--------
 net/sunrpc/xprt.c            | 49 +++++++++++++++++++++++---------------------
 6 files changed, 79 insertions(+), 46 deletions(-)

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index df3d02c..c45c21a 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -222,6 +222,7 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser
 	return server->nfs_client->cl_session;
 }
 
+extern bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy);
 extern int nfs4_setup_sequence(const struct nfs_server *server,
 		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
 		struct rpc_task *task);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 360240c..828a765 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -385,17 +385,20 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid)
 		free_slotid, tbl->highest_used_slotid);
 }
 
+bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy)
+{
+	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+	return true;
+}
+
 /*
  * Signal state manager thread if session fore channel is drained
  */
 static void nfs4_check_drain_fc_complete(struct nfs4_session *ses)
 {
-	struct rpc_task *task;
-
 	if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
-		task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq);
-		if (task)
-			rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+		rpc_wake_up_first(&ses->fc_slot_table.slot_tbl_waitq,
+				nfs4_set_task_privileged, NULL);
 		return;
 	}
 
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index a42e60d..f0e9881 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -190,23 +190,22 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
 static void nfs4_end_drain_session(struct nfs_client *clp)
 {
 	struct nfs4_session *ses = clp->cl_session;
+	struct nfs4_slot_table *tbl;
 	int max_slots;
 
 	if (ses == NULL)
 		return;
+	tbl = &ses->fc_slot_table;
 	if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
-		spin_lock(&ses->fc_slot_table.slot_tbl_lock);
-		max_slots = ses->fc_slot_table.max_slots;
+		spin_lock(&tbl->slot_tbl_lock);
+		max_slots = tbl->max_slots;
 		while (max_slots--) {
-			struct rpc_task *task;
-
-			task = rpc_wake_up_next(&ses->fc_slot_table.
-						slot_tbl_waitq);
-			if (!task)
+			if (rpc_wake_up_first(&tbl->slot_tbl_waitq,
+						nfs4_set_task_privileged,
+						NULL) == NULL)
 				break;
-			rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
 		}
-		spin_unlock(&ses->fc_slot_table.slot_tbl_lock);
+		spin_unlock(&tbl->slot_tbl_lock);
 	}
 }
 
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index b16243a..bd337f9 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -235,6 +235,9 @@ void		rpc_wake_up_queued_task(struct rpc_wait_queue *,
 					struct rpc_task *);
 void		rpc_wake_up(struct rpc_wait_queue *);
 struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *);
+struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *,
+					bool (*)(struct rpc_task *, void *),
+					void *);
 void		rpc_wake_up_status(struct rpc_wait_queue *, int);
 int		rpc_queue_empty(struct rpc_wait_queue *);
 void		rpc_delay(struct rpc_task *, unsigned long);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 3341d89..f982dfe 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -422,7 +422,7 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task);
 /*
  * Wake up the next task on a priority queue.
  */
-static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queue)
+static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *queue)
 {
 	struct list_head *q;
 	struct rpc_task *task;
@@ -467,30 +467,54 @@ new_queue:
 new_owner:
 	rpc_set_waitqueue_owner(queue, task->tk_owner);
 out:
-	rpc_wake_up_task_queue_locked(queue, task);
 	return task;
 }
 
+static struct rpc_task *__rpc_find_next_queued(struct rpc_wait_queue *queue)
+{
+	if (RPC_IS_PRIORITY(queue))
+		return __rpc_find_next_queued_priority(queue);
+	if (!list_empty(&queue->tasks[0]))
+		return list_first_entry(&queue->tasks[0], struct rpc_task, u.tk_wait.list);
+	return NULL;
+}
+
 /*
- * Wake up the next task on the wait queue.
+ * Wake up the first task on the wait queue.
  */
-struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue)
+struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *queue,
+		bool (*func)(struct rpc_task *, void *), void *data)
 {
 	struct rpc_task	*task = NULL;
 
-	dprintk("RPC:       wake_up_next(%p \"%s\")\n",
+	dprintk("RPC:       wake_up_first(%p \"%s\")\n",
 			queue, rpc_qname(queue));
 	spin_lock_bh(&queue->lock);
-	if (RPC_IS_PRIORITY(queue))
-		task = __rpc_wake_up_next_priority(queue);
-	else {
-		task_for_first(task, &queue->tasks[0])
+	task = __rpc_find_next_queued(queue);
+	if (task != NULL) {
+		if (func(task, data))
 			rpc_wake_up_task_queue_locked(queue, task);
+		else
+			task = NULL;
 	}
 	spin_unlock_bh(&queue->lock);
 
 	return task;
 }
+EXPORT_SYMBOL_GPL(rpc_wake_up_first);
+
+static bool rpc_wake_up_next_func(struct rpc_task *task, void *data)
+{
+	return true;
+}
+
+/*
+ * Wake up the next task on the wait queue.
+*/
+struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *queue)
+{
+	return rpc_wake_up_first(queue, rpc_wake_up_next_func, NULL);
+}
 EXPORT_SYMBOL_GPL(rpc_wake_up_next);
 
 /**
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index c64c0ef..839f6ef 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -292,54 +292,57 @@ static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
 	return retval;
 }
 
-static void __xprt_lock_write_next(struct rpc_xprt *xprt)
+static bool __xprt_lock_write_func(struct rpc_task *task, void *data)
 {
-	struct rpc_task *task;
+	struct rpc_xprt *xprt = data;
 	struct rpc_rqst *req;
 
-	if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
-		return;
-
-	task = rpc_wake_up_next(&xprt->sending);
-	if (task == NULL)
-		goto out_unlock;
-
 	req = task->tk_rqstp;
 	xprt->snd_task = task;
 	if (req) {
 		req->rq_bytes_sent = 0;
 		req->rq_ntrans++;
 	}
-	return;
+	return true;
+}
 
-out_unlock:
+static void __xprt_lock_write_next(struct rpc_xprt *xprt)
+{
+	if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
+		return;
+
+	if (rpc_wake_up_first(&xprt->sending, __xprt_lock_write_func, xprt))
+		return;
 	xprt_clear_locked(xprt);
 }
 
-static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
+static bool __xprt_lock_write_cong_func(struct rpc_task *task, void *data)
 {
-	struct rpc_task *task;
+	struct rpc_xprt *xprt = data;
 	struct rpc_rqst *req;
 
-	if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
-		return;
-	if (RPCXPRT_CONGESTED(xprt))
-		goto out_unlock;
-	task = rpc_wake_up_next(&xprt->sending);
-	if (task == NULL)
-		goto out_unlock;
-
 	req = task->tk_rqstp;
 	if (req == NULL) {
 		xprt->snd_task = task;
-		return;
+		return true;
 	}
 	if (__xprt_get_cong(xprt, task)) {
 		xprt->snd_task = task;
 		req->rq_bytes_sent = 0;
 		req->rq_ntrans++;
-		return;
+		return true;
 	}
+	return false;
+}
+
+static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
+{
+	if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
+		return;
+	if (RPCXPRT_CONGESTED(xprt))
+		goto out_unlock;
+	if (rpc_wake_up_first(&xprt->sending, __xprt_lock_write_cong_func, xprt))
+		return;
 out_unlock:
 	xprt_clear_locked(xprt);
 }
-- 
1.7.11.7


[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux