If the server restarts at an awkward time it is possible for write requests to block waiting for the state manager to run. If the state manager isn't already running a new thread will need to be started which requires a GFP_KERNEL allocation (for do_fork). If memory is short, that GFP_KERNEL allocation could block on the writes going out via NFS, resulting in a deadlock. The easiest solution is to keep the manager thread running always. There are two interesting requirements for the manager thread: 1/ It must allow SIGKILL, which can abort NFS transactions to a dead server. 2/ It may continue running after the filesystem is unmounted, until the server recovers or the thread is SIGKILLed These, particularly the last, make it unsuitable for kthread_worker, which can only be killed synchronously (via kthread_stop()). In this patch the manager thread no longer holds a counted reference on the client. Instead a new flag NFS_CS_MANAGER indicates that the thread is running and so holds an implicit reference. nfs_put_client will only free the client if this flag is clear. If it is set, the manager must free the client. nn->nfs_client_lock is used to ensure nfs_put_client() and the thread don't trip over each other at shutdown. Signed-off-by: NeilBrown <neilb@xxxxxxx> diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 180d1ec9c32e..9973675737dd 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -272,13 +272,16 @@ void nfs_put_client(struct nfs_client *clp) nn = net_generic(clp->cl_net, nfs_net_id); if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { + int manager_will_free = + test_bit(NFS_CS_MANAGER, &clp->cl_res_state); list_del(&clp->cl_share_link); nfs_cb_idr_remove_locked(clp); - spin_unlock(&nn->nfs_client_lock); - + if (manager_will_free) + nfs4_schedule_state_manager(clp); WARN_ON_ONCE(!list_empty(&clp->cl_superblocks)); - - clp->rpc_ops->free_client(clp); + spin_unlock(&nn->nfs_client_lock); + if (!manager_will_free) + clp->rpc_ops->free_client(clp); } } EXPORT_SYMBOL_GPL(nfs_put_client); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index ba2affa51941..014e730ee7a2 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -432,6 +432,7 @@ extern void nfs4_schedule_lease_recovery(struct nfs_client *); extern int nfs4_wait_clnt_recover(struct nfs_client *clp); extern int nfs4_client_recover_expired_lease(struct nfs_client *clp); extern void nfs4_schedule_state_manager(struct nfs_client *); +extern int nfs4_start_state_manager(struct nfs_client *); extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); extern int nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); extern int nfs4_schedule_migration_recovery(const struct nfs_server *); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index aa9ef4876046..907111174886 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -401,6 +401,11 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, } __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); + error = nfs4_start_state_manager(clp); + if (error < 0) + goto error; + __set_bit(NFS_CS_MANAGER, &clp->cl_res_state); + error = nfs4_init_client_minor_version(clp); if (error < 0) goto error; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 42f121182167..69d12decd04a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1150,15 +1150,12 @@ static void nfs4_clear_state_manager_bit(struct nfs_client *clp) /* * Schedule the nfs_client asynchronous state management routine */ -void nfs4_schedule_state_manager(struct nfs_client *clp) +int nfs4_start_state_manager(struct nfs_client *clp) { struct task_struct *task; char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; - if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) - return; __module_get(THIS_MODULE); - atomic_inc(&clp->cl_count); /* The rcu_read_lock() is not strictly necessary, as the state * manager is the only thread that ever changes the rpc_xprt @@ -1171,10 +1168,18 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) if (IS_ERR(task)) { printk(KERN_ERR "%s: kthread_run: %ld\n", __func__, PTR_ERR(task)); - nfs4_clear_state_manager_bit(clp); - nfs_put_client(clp); module_put(THIS_MODULE); + return PTR_ERR(task); } + return 0; +} + +void nfs4_schedule_state_manager(struct nfs_client *clp) +{ + if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) + return; + smp_mb__after_atomic(); + wake_up_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING); } /* @@ -2328,8 +2333,12 @@ static void nfs4_state_manager(struct nfs_client *clp) int status = 0; const char *section = "", *section_sep = ""; - /* Ensure exclusive access to NFSv4 state */ - do { + while (atomic_read(&clp->cl_count) && clp->cl_state) { + /* If we found more work to do, we must ensure the + * bit is set so other code can wait for it. + */ + set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); + if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) { section = "purge state"; status = nfs4_purge_lease(clp); @@ -2423,12 +2432,7 @@ static void nfs4_state_manager(struct nfs_client *clp) } nfs4_clear_state_manager_bit(clp); - /* Did we race with an attempt to give us more work? */ - if (clp->cl_state == 0) - break; - if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) - break; - } while (atomic_read(&clp->cl_count) > 1); + } return; out_error: if (strlen(section)) @@ -2444,10 +2448,23 @@ out_error: static int nfs4_run_state_manager(void *ptr) { struct nfs_client *clp = ptr; + struct nfs_net *nn; allow_signal(SIGKILL); - nfs4_state_manager(clp); - nfs_put_client(clp); + while (atomic_read(&clp->cl_count)) { + if (signal_pending(current)) + flush_signals(current); + wait_event_interruptible( + *bit_waitqueue(&clp->cl_state, + NFS4CLNT_MANAGER_RUNNING), + test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)); + nfs4_state_manager(clp); + } + nn = net_generic(clp->cl_net, nfs_net_id); + spin_lock(&nn->nfs_client_lock); + /* nfs_put_client has definitely stopped using its reference */ + spin_unlock(&nn->nfs_client_lock); + clp->rpc_ops->free_client(clp); module_put_and_exit(0); return 0; } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 1150ea41b626..40c7d1a53388 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -36,6 +36,7 @@ struct nfs_client { #define NFS_CS_RENEWD 3 /* - renewd started */ #define NFS_CS_STOP_RENEW 4 /* no more state to renew */ #define NFS_CS_CHECK_LEASE_TIME 5 /* need to check lease time */ +#define NFS_CS_MANAGER 6 /* NFSv4 manager thread running */ unsigned long cl_flags; /* behavior switches */ #define NFS_CS_NORESVPORT 0 /* - use ephemeral src port */ #define NFS_CS_DISCRTRY 1 /* - disconnect on RPC retry */
Attachment:
signature.asc
Description: PGP signature