Re: [PATCH 2/2] sunrpc: allow svc threads to fail initialisation cleanly

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> On Sep 11, 2024, at 2:33 PM, Jeff Layton <jlayton@xxxxxxxxxx> wrote:
> 
> On Tue, 2024-07-30 at 07:19 +1000, NeilBrown wrote:
>> If an svc thread needs to perform some initialisation that might fail,
>> it has no good way to handle the failure.
>> 
>> Before the thread can exit it must call svc_exit_thread(), but that
>> requires the service mutex to be held.  The thread cannot simply take
>> the mutex as that could deadlock if there is a concurrent attempt to
>> shut down all threads (which is unlikely, but not impossible).
>> 
>> nfsd currently call svc_exit_thread() unprotected in the unlikely event
>> that unshare_fs_struct() fails.
>> 
>> We can clean this up by introducing svc_thread_init_status() by which an
>> svc thread can report whether initialisation has succeeded.  If it has,
>> it continues normally into the action loop.  If it has not,
>> svc_thread_init_status() immediately aborts the thread.
>> svc_start_kthread() waits for either of these to happen, and calls
>> svc_exit_thread() (under the mutex) if the thread aborted.
>> 
>> Signed-off-by: NeilBrown <neilb@xxxxxxx>
>> ---
>> fs/lockd/svc.c             |  2 ++
>> fs/nfs/callback.c          |  2 ++
>> fs/nfsd/nfssvc.c           |  9 +++------
>> include/linux/sunrpc/svc.h | 28 ++++++++++++++++++++++++++++
>> net/sunrpc/svc.c           | 11 +++++++++++
>> 5 files changed, 46 insertions(+), 6 deletions(-)
>> 
>> diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
>> index 71713309967d..4ec22c2f2ea3 100644
>> --- a/fs/lockd/svc.c
>> +++ b/fs/lockd/svc.c
>> @@ -124,6 +124,8 @@ lockd(void *vrqstp)
>> struct net *net = &init_net;
>> struct lockd_net *ln = net_generic(net, lockd_net_id);
>> 
>> + svc_thread_init_status(rqstp, 0);
>> +
>> /* try_to_freeze() is called from svc_recv() */
>> set_freezable();
>> 
>> diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
>> index 8adfcd4c8c1a..6cf92498a5ac 100644
>> --- a/fs/nfs/callback.c
>> +++ b/fs/nfs/callback.c
>> @@ -76,6 +76,8 @@ nfs4_callback_svc(void *vrqstp)
>> {
>> struct svc_rqst *rqstp = vrqstp;
>> 
>> + svc_thread_init_status(rqstp, 0);
>> +
>> set_freezable();
>> 
>> while (!svc_thread_should_stop(rqstp))
>> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
>> index fca340b3ee91..1cef09a3c78e 100644
>> --- a/fs/nfsd/nfssvc.c
>> +++ b/fs/nfsd/nfssvc.c
>> @@ -875,11 +875,9 @@ nfsd(void *vrqstp)
>> 
>> /* At this point, the thread shares current->fs
>>  * with the init process. We need to create files with the
>> -  * umask as defined by the client instead of init's umask. */
>> - if (unshare_fs_struct() < 0) {
>> - printk("Unable to start nfsd thread: out of memory\n");
>> - goto out;
>> - }
>> +  * umask as defined by the client instead of init's umask.
>> +  */
>> + svc_thread_init_status(rqstp, unshare_fs_struct());
>> 
>> current->fs->umask = 0;
>> 
>> @@ -901,7 +899,6 @@ nfsd(void *vrqstp)
>> 
>> atomic_dec(&nfsd_th_cnt);
>> 
>> -out:
>> /* Release the thread */
>> svc_exit_thread(rqstp);
>> return 0;
>> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
>> index 99e9345d829e..437672bcaa22 100644
>> --- a/include/linux/sunrpc/svc.h
>> +++ b/include/linux/sunrpc/svc.h
>> @@ -21,6 +21,7 @@
>> #include <linux/wait.h>
>> #include <linux/mm.h>
>> #include <linux/pagevec.h>
>> +#include <linux/kthread.h>
>> 
>> /*
>>  *
>> @@ -232,6 +233,11 @@ struct svc_rqst {
>> struct net *rq_bc_net; /* pointer to backchannel's
>>  * net namespace
>>  */
>> +
>> + int rq_err; /* Thread sets this to inidicate
>> +  * initialisation success.
>> +  */
>> +
>> unsigned long bc_to_initval;
>> unsigned int bc_to_retries;
>> void ** rq_lease_breaker; /* The v4 client breaking a lease */
>> @@ -305,6 +311,28 @@ static inline bool svc_thread_should_stop(struct svc_rqst *rqstp)
>> return test_bit(RQ_VICTIM, &rqstp->rq_flags);
>> }
>> 
>> +/**
>> + * svc_thread_init_status - report whether thread has initialised successfully
>> + * @rqstp: the thread in question
>> + * @err: errno code
>> + *
>> + * After performing any initialisation that could fail, and before starting
>> + * normal work, each sunrpc svc_thread must call svc_thread_init_status()
>> + * with an appropriate error, or zero.
>> + *
>> + * If zero is passed, the thread is ready and must continue until
>> + * svc_thread_should_stop() returns true.  If a non-zero error is passed
>> + * the call will not return - the thread will exit.
>> + */
>> +static inline void svc_thread_init_status(struct svc_rqst *rqstp, int err)
>> +{
>> + /* store_release ensures svc_start_kthreads() sees the error */
>> + smp_store_release(&rqstp->rq_err, err);
>> + wake_up_var(&rqstp->rq_err);
>> + if (err)
>> + kthread_exit(1);
>> +}
>> +
>> struct svc_deferred_req {
>> u32 prot; /* protocol (UDP or TCP) */
>> struct svc_xprt *xprt;
>> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
>> index ae31ffea7a14..1e80fa67d8b7 100644
>> --- a/net/sunrpc/svc.c
>> +++ b/net/sunrpc/svc.c
>> @@ -706,6 +706,8 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
>> if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node))
>> goto out_enomem;
>> 
>> + rqstp->rq_err = -EAGAIN; /* No error yet */
>> +
>> serv->sv_nrthreads += 1;
>> pool->sp_nrthreads += 1;
>> 
>> @@ -792,6 +794,7 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
>> struct svc_pool *chosen_pool;
>> unsigned int state = serv->sv_nrthreads-1;
>> int node;
>> + int err;
>> 
>> do {
>> nrservs--;
>> @@ -814,6 +817,14 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
>> 
>> svc_sock_update_bufs(serv);
>> wake_up_process(task);
>> +
>> + /* load_acquire ensures we get value stored in svc_thread_init_status() */
>> + wait_var_event(&rqstp->rq_err, smp_load_acquire(&rqstp->rq_err) != -EAGAIN);
>> + err = rqstp->rq_err;
>> + if (err) {
>> + svc_exit_thread(rqstp);
>> + return err;
>> + }
>> } while (nrservs > 0);
>> 
>> return 0;
> 
> 
> I hit a hang today on the client while running the nfs-interop test
> under kdevops. The client is stuck in mount syscall, while trying to
> set up the backchannel:
> 
> [ 1693.653257] INFO: task mount.nfs:13243 blocked for more than 120 seconds.
> [ 1693.655827]       Not tainted 6.11.0-rc7-gffcadb41b696 #166
> [ 1693.657858] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> [ 1693.661442] task:mount.nfs       state:D stack:0     pid:13243 tgid:13243 ppid:13242  flags:0x00004002
> [ 1693.664648] Call Trace:
> [ 1693.665639]  <TASK>
> [ 1693.666482]  __schedule+0xc04/0x2750
> [ 1693.668021]  ? __pfx___schedule+0x10/0x10
> [ 1693.669562]  ? _raw_spin_lock_irqsave+0x98/0xf0
> [ 1693.671213]  ? __pfx__raw_spin_lock_irqsave+0x10/0x10
> [ 1693.673109]  ? try_to_wake_up+0x141/0x1210
> [ 1693.674763]  ? __pfx_try_to_wake_up+0x10/0x10
> [ 1693.676612]  ? _raw_spin_unlock_irqrestore+0x23/0x40
> [ 1693.678429]  schedule+0x73/0x2f0
> [ 1693.679662]  svc_set_num_threads+0xbc8/0xf80 [sunrpc]
> [ 1693.682007]  ? __pfx_svc_set_num_threads+0x10/0x10 [sunrpc]
> [ 1693.684163]  ? __pfx_var_wake_function+0x10/0x10
> [ 1693.685849]  ? __svc_create+0x6c0/0x980 [sunrpc]
> [ 1693.687777]  nfs_callback_up+0x314/0xae0 [nfsv4]
> [ 1693.689630]  nfs4_init_client+0x203/0x400 [nfsv4]
> [ 1693.691468]  ? __pfx_nfs4_init_client+0x10/0x10 [nfsv4]
> [ 1693.693502]  ? _raw_spin_lock_irqsave+0x98/0xf0
> [ 1693.695141]  nfs4_set_client+0x2f4/0x520 [nfsv4]
> [ 1693.696967]  ? __pfx_nfs4_set_client+0x10/0x10 [nfsv4]
> [ 1693.699230]  nfs4_create_server+0x5f2/0xef0 [nfsv4]
> [ 1693.701357]  ? _raw_spin_lock+0x85/0xe0
> [ 1693.702758]  ? __pfx__raw_spin_lock+0x10/0x10
> [ 1693.704344]  ? nfs_get_tree+0x61f/0x16a0 [nfs]
> [ 1693.706160]  ? __pfx_nfs4_create_server+0x10/0x10 [nfsv4]
> [ 1693.707376]  ? __module_get+0x26/0xc0
> [ 1693.708061]  nfs4_try_get_tree+0xcd/0x250 [nfsv4]
> [ 1693.708893]  vfs_get_tree+0x83/0x2d0
> [ 1693.709534]  path_mount+0x88d/0x19a0
> [ 1693.710100]  ? __pfx_path_mount+0x10/0x10
> [ 1693.710718]  ? user_path_at+0xa4/0xe0
> [ 1693.711303]  ? kmem_cache_free+0x143/0x3e0
> [ 1693.711936]  __x64_sys_mount+0x1fb/0x270
> [ 1693.712606]  ? __pfx___x64_sys_mount+0x10/0x10
> [ 1693.713315]  do_syscall_64+0x4b/0x110
> [ 1693.713889]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
> [ 1693.714660] RIP: 0033:0x7f05050418fe
> [ 1693.715233] RSP: 002b:00007fff4e0f5728 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5
> [ 1693.716417] RAX: ffffffffffffffda RBX: 00007fff4e0f5900 RCX: 00007f05050418fe
> [ 1693.717492] RDX: 00005559cea3ea70 RSI: 00005559cea3fe60 RDI: 00005559cea40140
> [ 1693.718558] RBP: 00007fff4e0f5760 R08: 00005559cea41e60 R09: 00007f050510cb20
> [ 1693.719620] R10: 0000000000000000 R11: 0000000000000246 R12: 00005559cea41e60
> [ 1693.720735] R13: 00007fff4e0f5900 R14: 00005559cea41df0 R15: 0000000000000004
> 
> 
> Looking at faddr2line:
> 
> $ ./scripts/faddr2line --list net/sunrpc/sunrpc.ko svc_set_num_threads+0xbc8/0xf80
> svc_set_num_threads+0xbc8/0xf80:
> 
> svc_start_kthreads at /home/jlayton/git/kdevops/linux/net/sunrpc/svc.c:822 (discriminator 17)
> 817 
> 818                    svc_sock_update_bufs(serv);
> 819                    wake_up_process(task);
> 820 
> 821                    /* load_acquire ensures we get value stored in svc_thread_init_status() */
>> 822<                   wait_var_event(&rqstp->rq_err, smp_load_acquire(&rqstp->rq_err) != -EAGAIN);
> 823                    err = rqstp->rq_err;
> 824                    if (err) {
> 825                            svc_exit_thread(rqstp);
> 826                            return err;
> 827                    }
> 
> (inlined by) svc_set_num_threads at /home/jlayton/git/kdevops/linux/net/sunrpc/svc.c:877 (discriminator 17)
> 872                    nrservs -= serv->sv_nrthreads;
> 873            else
> 874                    nrservs -= pool->sp_nrthreads;
> 875 
> 876            if (nrservs > 0)
>> 877<                   return svc_start_kthreads(serv, pool, nrservs);
> 878            if (nrservs < 0)
> 879                    return svc_stop_kthreads(serv, pool, nrservs);
> 880            return 0;
> 881    }
> 882    EXPORT_SYMBOL_GPL(svc_set_num_threads);
> 
> It looks like the callback thread started up properly and is past the svc_thread_init_status call.
> 
> $ sudo cat /proc/13246/stack
> [<0>] svc_recv+0xcef/0x2020 [sunrpc]
> [<0>] nfs4_callback_svc+0xb0/0x140 [nfsv4]
> [<0>] kthread+0x29b/0x360
> [<0>] ret_from_fork+0x30/0x70
> [<0>] ret_from_fork_asm+0x1a/0x30
> 
> ...so the status should have been updated properly. Barrier problem?

Speaking as NFSD release manager: I don't intend to send the
NFSD v6.12 PR until week of September 23rd.

I will either drop these patches before sending my PR, or I
can squash in a fix if one is found.


--
Chuck Lever






[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux