On Sun, 12 Jul 2020, Chuck Lever wrote: > Hi Scott- > > > On Jul 10, 2020, at 4:33 PM, Scott Mayhew <smayhew@xxxxxxxxxx> wrote: > > > > If the rpc_pipefs is unmounted, then the rpc_pipe->dentry becomes NULL > > and dereferencing the dentry->d_sb will trigger an oops. The only > > reason we're doing that is to determine the nfsd_net, which could > > instead be passed in by the caller. So do that instead. > > > > Signed-off-by: Scott Mayhew <smayhew@xxxxxxxxxx> > > Looks straightforward. Applied to the nfsd-5.9 testing tree. > > I'm wondering about automatic backports to stable. This fix does not > apply to kernels before v5.6, but IIUC addresses a crash introduced > in f3f8014862d8 ("nfsd: add the infrastructure to handle the cld upcall") > [2012] ? > > Is "Cc: <stable@xxxxxxxxxx> # v5.6+" appropriate? > I think it would be 11a60d159259 ("nfsd: add a "GetVersion" upcall for nfsdcld"), so it would only need to go back to v5.4... and to get the patch to apply, in the hunk that modifies nfsd4_cld_grace_done_v0() you would need to add a cast of nn->boot_time to int64_t (which 9cc7680149b2 "nfsd: make 'boot_time' 64-bit wide" removed). > Also, is there a bug report that documents the crash? Our QE hit the crash internally while running xfstests on a pNFS SCSI setup. I don't think either of those is relevant aside from the fact that several nfsd threads where stuck on xfs while calling nfsd4_scsi_proc_layoutcommit(). I think what happened is that the job timed out and the system was in the process of being shut down when the oops occurred, and the rpc_pipefs was unmounted out from under nfsd: crash> bt PID: 39572 TASK: ffff8a78f67fddc0 CPU: 1 COMMAND: "kworker/u4:0" #0 [ffffa39a026b3ae0] machine_kexec at ffffffffae65b55e #1 [ffffa39a026b3b38] __crash_kexec at ffffffffae75ea5d #2 [ffffa39a026b3c00] crash_kexec at ffffffffae75f93d #3 [ffffa39a026b3c18] oops_end at ffffffffae622c4d #4 [ffffa39a026b3c38] no_context at ffffffffae66aa2e #5 [ffffa39a026b3c90] do_page_fault at ffffffffae66b552 #6 [ffffa39a026b3cc0] async_page_fault at ffffffffaf00123e [exception RIP: __cld_pipe_upcall+0x3d] RIP: ffffffffc06b5ded RSP: ffffa39a026b3d70 RFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8a78ceaf7000 RCX: 000000000000000b RDX: ffff8a78ceaf7000 RSI: ffffa39a026b3d70 RDI: ffffa39a026b3d70 RBP: ffffa39a026b3dc0 R8: ffff8a78f37aac50 R9: ffff8a78c7c02800 R10: 8080808080808080 R11: 0000ec193f7575c0 R12: ffff8a78c8ef3038 R13: ffff8a78d3156220 R14: ffffa39a026b3e50 R15: ffff8a78d3156220 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #7 [ffffa39a026b3dc8] nfsd4_cld_remove at ffffffffc06b7b70 [nfsd] #8 [ffffa39a026b3df8] expire_client at ffffffffc06aa4d6 [nfsd] #9 [ffffa39a026b3e08] laundromat_main at ffffffffc06aa799 [nfsd] #10 [ffffa39a026b3e98] process_one_work at ffffffffae6d1cf7 #11 [ffffa39a026b3ed8] worker_thread at ffffffffae6d23c0 #12 [ffffa39a026b3f10] kthread at ffffffffae6d7d02 #13 [ffffa39a026b3f50] ret_from_fork at ffffffffaf000255 crash> dis -lr __cld_pipe_upcall+0x3d ... /usr/src/debug/kernel-4.18.0-211.el8/linux-4.18.0-211.el8.x86_64/fs/nfsd/nfs4recover.c: 764 0xffffffffc06b5de0 <__cld_pipe_upcall+0x30>: mov 0x108(%rdi),%rax 0xffffffffc06b5de7 <__cld_pipe_upcall+0x37>: mov %rsp,%rsi 0xffffffffc06b5dea <__cld_pipe_upcall+0x3a>: mov %rsi,%rdi 0xffffffffc06b5ded <__cld_pipe_upcall+0x3d>: mov 0x68(%rax),%rax That puts us here: static int __cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg) { int ret; struct rpc_pipe_msg msg; struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_u); struct nfsd_net *nn = net_generic(pipe->dentry->d_sb->s_fs_info, nfsd_net_id); ... crash> rpc_pipe.dentry -o struct rpc_pipe { [0x108] struct dentry *dentry; } crash> dentry.d_sb -o struct dentry { [0x68] struct super_block *d_sb; } If the rpc_pipe->dentry was NULL, that would explain the NULL pointer dereference at 0000000000000068. Let's confirm. crash> dis -lr ffffffffc06b7b70 ... /usr/src/debug/kernel-4.18.0-211.el8/linux-4.18.0-211.el8.x86_64/fs/nfsd/nfs4recover.c: 794 0xffffffffc06b7b65 <nfsd4_cld_remove+0x85>: mov %rbp,%rsi 0xffffffffc06b7b68 <nfsd4_cld_remove+0x88>: mov %rbx,%rdi <------------------------------rpc_pipe 0xffffffffc06b7b6b <nfsd4_cld_remove+0x8b>: callq 0xffffffffc06b5db0 <__cld_pipe_upcall> /usr/src/debug/kernel-4.18.0-211.el8/linux-4.18.0-211.el8.x86_64/fs/nfsd/nfs4recover.c: 795 0xffffffffc06b7b70 <nfsd4_cld_remove+0x90>: cmp $0xfffffff5,%eax crash> rpc_pipe.dentry ffff8a78ceaf7000 dentry = 0x0 We can also see that the rpc_pipefs isn't mounted: crash> mount|grep pipefs (nothing) And none of the daemons that require the rpc_pipefs are running: crash> ps|grep nfsdcld crash> ps|grep idmapd crash> ps|grep gssd crash> ps|grep blkmapd (nothing) I think part of the problem is that the nfsdcld.service unit file has "Requires=rpc_pipefs.target", but the nfs-server.service unit file only has "Wants=nfsdcld.service". It can't be "Requires" because using the nfsdcld daemon for client tracking is optional. -Scott > > > > --- > > fs/nfsd/nfs4recover.c | 24 +++++++++++------------- > > 1 file changed, 11 insertions(+), 13 deletions(-) > > > > diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c > > index 9e40dfecf1b1..186fa2c2c6ba 100644 > > --- a/fs/nfsd/nfs4recover.c > > +++ b/fs/nfsd/nfs4recover.c > > @@ -747,13 +747,11 @@ struct cld_upcall { > > }; > > > > static int > > -__cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg) > > +__cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg, struct nfsd_net *nn) > > { > > int ret; > > struct rpc_pipe_msg msg; > > struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_u); > > - struct nfsd_net *nn = net_generic(pipe->dentry->d_sb->s_fs_info, > > - nfsd_net_id); > > > > memset(&msg, 0, sizeof(msg)); > > msg.data = cmsg; > > @@ -773,7 +771,7 @@ __cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg) > > } > > > > static int > > -cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg) > > +cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg, struct nfsd_net *nn) > > { > > int ret; > > > > @@ -782,7 +780,7 @@ cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg) > > * upcalls queued. > > */ > > do { > > - ret = __cld_pipe_upcall(pipe, cmsg); > > + ret = __cld_pipe_upcall(pipe, cmsg, nn); > > } while (ret == -EAGAIN); > > > > return ret; > > @@ -1115,7 +1113,7 @@ nfsd4_cld_create(struct nfs4_client *clp) > > memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, > > clp->cl_name.len); > > > > - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); > > + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); > > if (!ret) { > > ret = cup->cu_u.cu_msg.cm_status; > > set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); > > @@ -1180,7 +1178,7 @@ nfsd4_cld_create_v2(struct nfs4_client *clp) > > } else > > cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = 0; > > > > - ret = cld_pipe_upcall(cn->cn_pipe, cmsg); > > + ret = cld_pipe_upcall(cn->cn_pipe, cmsg, nn); > > if (!ret) { > > ret = cmsg->cm_status; > > set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); > > @@ -1218,7 +1216,7 @@ nfsd4_cld_remove(struct nfs4_client *clp) > > memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, > > clp->cl_name.len); > > > > - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); > > + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); > > if (!ret) { > > ret = cup->cu_u.cu_msg.cm_status; > > clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); > > @@ -1261,7 +1259,7 @@ nfsd4_cld_check_v0(struct nfs4_client *clp) > > memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, > > clp->cl_name.len); > > > > - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); > > + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); > > if (!ret) { > > ret = cup->cu_u.cu_msg.cm_status; > > set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); > > @@ -1404,7 +1402,7 @@ nfsd4_cld_grace_start(struct nfsd_net *nn) > > } > > > > cup->cu_u.cu_msg.cm_cmd = Cld_GraceStart; > > - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); > > + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); > > if (!ret) > > ret = cup->cu_u.cu_msg.cm_status; > > > > @@ -1432,7 +1430,7 @@ nfsd4_cld_grace_done_v0(struct nfsd_net *nn) > > > > cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone; > > cup->cu_u.cu_msg.cm_u.cm_gracetime = nn->boot_time; > > - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); > > + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); > > if (!ret) > > ret = cup->cu_u.cu_msg.cm_status; > > > > @@ -1460,7 +1458,7 @@ nfsd4_cld_grace_done(struct nfsd_net *nn) > > } > > > > cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone; > > - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); > > + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); > > if (!ret) > > ret = cup->cu_u.cu_msg.cm_status; > > > > @@ -1524,7 +1522,7 @@ nfsd4_cld_get_version(struct nfsd_net *nn) > > goto out_err; > > } > > cup->cu_u.cu_msg.cm_cmd = Cld_GetVersion; > > - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); > > + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); > > if (!ret) { > > ret = cup->cu_u.cu_msg.cm_status; > > if (ret) > > -- > > 2.25.4 > > > > -- > Chuck Lever > > >