On Wed, 2013-01-09 at 15:52 -0500, Trond Myklebust wrote: +AD4- On Wed, 2013-01-09 at 12:55 -0500, Chris Perl wrote: +AD4- +AD4- +AD4- Hrm. I guess I'm in over my head here. Apologoies if I'm just asking +AD4- +AD4- +AD4- silly bumbling questions. You can start ignoring me at any time. :) +AD4- +AD4- +AD4- +AD4- I stared at the code for a while and more and now see why what I +AD4- +AD4- outlined is not possible. Thanks for helping to clarify+ACE- +AD4- +AD4- +AD4- +AD4- I decided to pull your git repo and compile with HEAD at +AD4- +AD4- 87ed50036b866db2ec2ba16b2a7aec4a2b0b7c39 (linux-next as of this +AD4- +AD4- morning). Using this kernel, I can no longer induce any hangs. +AD4- +AD4- +AD4- +AD4- Interestingly, I tried recompiling the CentOS 6.3 kernel with +AD4- +AD4- both the original patch (v4) and the last patch you sent about fixing +AD4- +AD4- priority queues. With both of those in place, I still run into a +AD4- +AD4- problem. +AD4- +AD4- +AD4- +AD4- echo 0 +AD4- /proc/sys/sunrpc/rpc+AF8-debug after the hang shows (I left in the +AD4- +AD4- previous additional prints and added printing of the tasks pointer +AD4- +AD4- itself): +AD4- +AD4- +AD4- +AD4- +ADw-6+AD4-client: ffff88082896c200, xprt: ffff880829011000, snd+AF8-task: ffff880829a1aac0 +AD4- +AD4- +ADw-6+AD4-client: ffff8808282b5600, xprt: ffff880829011000, snd+AF8-task: ffff880829a1aac0 +AD4- +AD4- +ADw-6+AD4---task-- -pid- flgs status -client- --rqstp- -timeout ---ops-- +AD4- +AD4- +ADw-6+AD4-ffff88082a463180 22007 0080 -11 ffff8808282b5600 (null) 0 ffffffffa027b7a0 nfsv3 ACCESS a:call+AF8-reserveresult q:xprt+AF8-sending +AD4- +AD4- +ADw-6+AD4-client: ffff88082838cc00, xprt: ffff88082b7c5800, snd+AF8-task: (null) +AD4- +AD4- +ADw-6+AD4-client: ffff8808283db400, xprt: ffff88082b7c5800, snd+AF8-task: (null) +AD4- +AD4- +ADw-6+AD4-client: ffff8808283db200, xprt: ffff880829011000, snd+AF8-task: ffff880829a1aac0 +AD4- +AD4- +AD4- +AD4- Any thoughts about other patches that might affect this? +AD4- +AD4- Hmm... The only one that springs to mind is this one (see attachment) +AD4- and then the 'connect' fixes that you helped us with previously. Never mind. I suspect that the main reason why RHEL-6.3 is still vulnerable is that it lacks commit 961a828df64979d2a9faeeeee043391670a193b9 (SUNRPC: Fix potential races in xprt+AF8-lock+AF8-write+AF8-next()). -- Trond Myklebust Linux NFS client maintainer NetApp Trond.Myklebust+AEA-netapp.com www.netapp.com
From 961a828df64979d2a9faeeeee043391670a193b9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@xxxxxxxxxx> Date: Tue, 17 Jan 2012 22:57:37 -0500 Subject: [PATCH] SUNRPC: Fix potential races in xprt_lock_write_next() We have to ensure that the wake up from the waitqueue and the assignment of xprt->snd_task are atomic. We can do this by assigning the snd_task while under the waitqueue spinlock. Signed-off-by: Trond Myklebust <Trond.Myklebust@xxxxxxxxxx> --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 13 +++++++----- fs/nfs/nfs4state.c | 17 ++++++++------- include/linux/sunrpc/sched.h | 3 +++ net/sunrpc/sched.c | 42 +++++++++++++++++++++++++++++-------- net/sunrpc/xprt.c | 49 +++++++++++++++++++++++--------------------- 6 files changed, 79 insertions(+), 46 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index df3d02c..c45c21a 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -222,6 +222,7 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser return server->nfs_client->cl_session; } +extern bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy); extern int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, struct rpc_task *task); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 360240c..828a765 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -385,17 +385,20 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) free_slotid, tbl->highest_used_slotid); } +bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy) +{ + rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); + return true; +} + /* * Signal state manager thread if session fore channel is drained */ static void nfs4_check_drain_fc_complete(struct nfs4_session *ses) { - struct rpc_task *task; - if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { - task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq); - if (task) - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); + rpc_wake_up_first(&ses->fc_slot_table.slot_tbl_waitq, + nfs4_set_task_privileged, NULL); return; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a42e60d..f0e9881 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -190,23 +190,22 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp) static void nfs4_end_drain_session(struct nfs_client *clp) { struct nfs4_session *ses = clp->cl_session; + struct nfs4_slot_table *tbl; int max_slots; if (ses == NULL) return; + tbl = &ses->fc_slot_table; if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { - spin_lock(&ses->fc_slot_table.slot_tbl_lock); - max_slots = ses->fc_slot_table.max_slots; + spin_lock(&tbl->slot_tbl_lock); + max_slots = tbl->max_slots; while (max_slots--) { - struct rpc_task *task; - - task = rpc_wake_up_next(&ses->fc_slot_table. - slot_tbl_waitq); - if (!task) + if (rpc_wake_up_first(&tbl->slot_tbl_waitq, + nfs4_set_task_privileged, + NULL) == NULL) break; - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); } - spin_unlock(&ses->fc_slot_table.slot_tbl_lock); + spin_unlock(&tbl->slot_tbl_lock); } } diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index b16243a..bd337f9 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -235,6 +235,9 @@ void rpc_wake_up_queued_task(struct rpc_wait_queue *, struct rpc_task *); void rpc_wake_up(struct rpc_wait_queue *); struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); +struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *, + bool (*)(struct rpc_task *, void *), + void *); void rpc_wake_up_status(struct rpc_wait_queue *, int); int rpc_queue_empty(struct rpc_wait_queue *); void rpc_delay(struct rpc_task *, unsigned long); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 3341d89..f982dfe 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -422,7 +422,7 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task); /* * Wake up the next task on a priority queue. */ -static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queue) +static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *queue) { struct list_head *q; struct rpc_task *task; @@ -467,30 +467,54 @@ new_queue: new_owner: rpc_set_waitqueue_owner(queue, task->tk_owner); out: - rpc_wake_up_task_queue_locked(queue, task); return task; } +static struct rpc_task *__rpc_find_next_queued(struct rpc_wait_queue *queue) +{ + if (RPC_IS_PRIORITY(queue)) + return __rpc_find_next_queued_priority(queue); + if (!list_empty(&queue->tasks[0])) + return list_first_entry(&queue->tasks[0], struct rpc_task, u.tk_wait.list); + return NULL; +} + /* - * Wake up the next task on the wait queue. + * Wake up the first task on the wait queue. */ -struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue) +struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *queue, + bool (*func)(struct rpc_task *, void *), void *data) { struct rpc_task *task = NULL; - dprintk("RPC: wake_up_next(%p \"%s\")\n", + dprintk("RPC: wake_up_first(%p \"%s\")\n", queue, rpc_qname(queue)); spin_lock_bh(&queue->lock); - if (RPC_IS_PRIORITY(queue)) - task = __rpc_wake_up_next_priority(queue); - else { - task_for_first(task, &queue->tasks[0]) + task = __rpc_find_next_queued(queue); + if (task != NULL) { + if (func(task, data)) rpc_wake_up_task_queue_locked(queue, task); + else + task = NULL; } spin_unlock_bh(&queue->lock); return task; } +EXPORT_SYMBOL_GPL(rpc_wake_up_first); + +static bool rpc_wake_up_next_func(struct rpc_task *task, void *data) +{ + return true; +} + +/* + * Wake up the next task on the wait queue. +*/ +struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *queue) +{ + return rpc_wake_up_first(queue, rpc_wake_up_next_func, NULL); +} EXPORT_SYMBOL_GPL(rpc_wake_up_next); /** diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index c64c0ef..839f6ef 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -292,54 +292,57 @@ static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) return retval; } -static void __xprt_lock_write_next(struct rpc_xprt *xprt) +static bool __xprt_lock_write_func(struct rpc_task *task, void *data) { - struct rpc_task *task; + struct rpc_xprt *xprt = data; struct rpc_rqst *req; - if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) - return; - - task = rpc_wake_up_next(&xprt->sending); - if (task == NULL) - goto out_unlock; - req = task->tk_rqstp; xprt->snd_task = task; if (req) { req->rq_bytes_sent = 0; req->rq_ntrans++; } - return; + return true; +} -out_unlock: +static void __xprt_lock_write_next(struct rpc_xprt *xprt) +{ + if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) + return; + + if (rpc_wake_up_first(&xprt->sending, __xprt_lock_write_func, xprt)) + return; xprt_clear_locked(xprt); } -static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) +static bool __xprt_lock_write_cong_func(struct rpc_task *task, void *data) { - struct rpc_task *task; + struct rpc_xprt *xprt = data; struct rpc_rqst *req; - if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) - return; - if (RPCXPRT_CONGESTED(xprt)) - goto out_unlock; - task = rpc_wake_up_next(&xprt->sending); - if (task == NULL) - goto out_unlock; - req = task->tk_rqstp; if (req == NULL) { xprt->snd_task = task; - return; + return true; } if (__xprt_get_cong(xprt, task)) { xprt->snd_task = task; req->rq_bytes_sent = 0; req->rq_ntrans++; - return; + return true; } + return false; +} + +static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) +{ + if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) + return; + if (RPCXPRT_CONGESTED(xprt)) + goto out_unlock; + if (rpc_wake_up_first(&xprt->sending, __xprt_lock_write_cong_func, xprt)) + return; out_unlock: xprt_clear_locked(xprt); } -- 1.7.11.7