Re: [PATCH] nfsd: avoid a NULL dereference in __cld_pipe_upcall()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, 12 Jul 2020, Chuck Lever wrote:

> Hi Scott-
> 
> > On Jul 10, 2020, at 4:33 PM, Scott Mayhew <smayhew@xxxxxxxxxx> wrote:
> > 
> > If the rpc_pipefs is unmounted, then the rpc_pipe->dentry becomes NULL
> > and dereferencing the dentry->d_sb will trigger an oops.  The only
> > reason we're doing that is to determine the nfsd_net, which could
> > instead be passed in by the caller.  So do that instead.
> > 
> > Signed-off-by: Scott Mayhew <smayhew@xxxxxxxxxx>
> 
> Looks straightforward. Applied to the nfsd-5.9 testing tree.
> 
> I'm wondering about automatic backports to stable. This fix does not
> apply to kernels before v5.6, but IIUC addresses a crash introduced
> in f3f8014862d8 ("nfsd: add the infrastructure to handle the cld upcall")
> [2012] ?
> 
> Is "Cc: <stable@xxxxxxxxxx> # v5.6+" appropriate?
>
I think it would be 11a60d159259 ("nfsd: add a "GetVersion" upcall for
nfsdcld"), so it would only need to go back to v5.4... and to get the
patch to apply, in the hunk that modifies nfsd4_cld_grace_done_v0()
you would need to add a cast of nn->boot_time to int64_t (which
9cc7680149b2 "nfsd: make 'boot_time' 64-bit wide" removed).
 
> Also, is there a bug report that documents the crash?

Our QE hit the crash internally while running xfstests on a pNFS SCSI
setup.  I don't think either of those is relevant aside from the fact
that several nfsd threads where stuck on xfs while calling
nfsd4_scsi_proc_layoutcommit().  I think what happened is that the job
timed out and the system was in the process of being shut down when the
oops occurred, and the rpc_pipefs was unmounted out from under nfsd:

crash> bt
PID: 39572  TASK: ffff8a78f67fddc0  CPU: 1   COMMAND: "kworker/u4:0"
 #0 [ffffa39a026b3ae0] machine_kexec at ffffffffae65b55e
 #1 [ffffa39a026b3b38] __crash_kexec at ffffffffae75ea5d
 #2 [ffffa39a026b3c00] crash_kexec at ffffffffae75f93d
 #3 [ffffa39a026b3c18] oops_end at ffffffffae622c4d
 #4 [ffffa39a026b3c38] no_context at ffffffffae66aa2e
 #5 [ffffa39a026b3c90] do_page_fault at ffffffffae66b552
 #6 [ffffa39a026b3cc0] async_page_fault at ffffffffaf00123e
    [exception RIP: __cld_pipe_upcall+0x3d]
    RIP: ffffffffc06b5ded  RSP: ffffa39a026b3d70  RFLAGS: 00010246
    RAX: 0000000000000000  RBX: ffff8a78ceaf7000  RCX: 000000000000000b
    RDX: ffff8a78ceaf7000  RSI: ffffa39a026b3d70  RDI: ffffa39a026b3d70
    RBP: ffffa39a026b3dc0   R8: ffff8a78f37aac50   R9: ffff8a78c7c02800
    R10: 8080808080808080  R11: 0000ec193f7575c0  R12: ffff8a78c8ef3038
    R13: ffff8a78d3156220  R14: ffffa39a026b3e50  R15: ffff8a78d3156220
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
 #7 [ffffa39a026b3dc8] nfsd4_cld_remove at ffffffffc06b7b70 [nfsd]
 #8 [ffffa39a026b3df8] expire_client at ffffffffc06aa4d6 [nfsd]
 #9 [ffffa39a026b3e08] laundromat_main at ffffffffc06aa799 [nfsd]
#10 [ffffa39a026b3e98] process_one_work at ffffffffae6d1cf7
#11 [ffffa39a026b3ed8] worker_thread at ffffffffae6d23c0
#12 [ffffa39a026b3f10] kthread at ffffffffae6d7d02
#13 [ffffa39a026b3f50] ret_from_fork at ffffffffaf000255

crash> dis -lr __cld_pipe_upcall+0x3d
...
/usr/src/debug/kernel-4.18.0-211.el8/linux-4.18.0-211.el8.x86_64/fs/nfsd/nfs4recover.c: 764
0xffffffffc06b5de0 <__cld_pipe_upcall+0x30>:    mov    0x108(%rdi),%rax
0xffffffffc06b5de7 <__cld_pipe_upcall+0x37>:    mov    %rsp,%rsi
0xffffffffc06b5dea <__cld_pipe_upcall+0x3a>:    mov    %rsi,%rdi
0xffffffffc06b5ded <__cld_pipe_upcall+0x3d>:    mov    0x68(%rax),%rax

That puts us here:

static int
__cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
{
        int ret;
        struct rpc_pipe_msg msg;
        struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_u);
        struct nfsd_net *nn = net_generic(pipe->dentry->d_sb->s_fs_info,
                                          nfsd_net_id);
...
crash> rpc_pipe.dentry -o
struct rpc_pipe {
  [0x108] struct dentry *dentry;
}
crash> dentry.d_sb -o
struct dentry {
  [0x68] struct super_block *d_sb;
}

If the rpc_pipe->dentry was NULL, that would explain the NULL pointer dereference at 0000000000000068.  Let's confirm.

crash> dis -lr ffffffffc06b7b70
...
/usr/src/debug/kernel-4.18.0-211.el8/linux-4.18.0-211.el8.x86_64/fs/nfsd/nfs4recover.c: 794
0xffffffffc06b7b65 <nfsd4_cld_remove+0x85>:     mov    %rbp,%rsi
0xffffffffc06b7b68 <nfsd4_cld_remove+0x88>:     mov    %rbx,%rdi <------------------------------rpc_pipe
0xffffffffc06b7b6b <nfsd4_cld_remove+0x8b>:     callq  0xffffffffc06b5db0 <__cld_pipe_upcall>
/usr/src/debug/kernel-4.18.0-211.el8/linux-4.18.0-211.el8.x86_64/fs/nfsd/nfs4recover.c: 795
0xffffffffc06b7b70 <nfsd4_cld_remove+0x90>:     cmp    $0xfffffff5,%eax

crash> rpc_pipe.dentry ffff8a78ceaf7000
  dentry = 0x0

We can also see that the rpc_pipefs isn't mounted:

crash> mount|grep pipefs
(nothing)

And none of the daemons that require the rpc_pipefs are running:

crash> ps|grep nfsdcld
crash> ps|grep idmapd
crash> ps|grep gssd
crash> ps|grep blkmapd
(nothing)

I think part of the problem is that the nfsdcld.service unit file has
"Requires=rpc_pipefs.target", but the nfs-server.service unit file only
has "Wants=nfsdcld.service".  It can't be "Requires" because using the
nfsdcld daemon for client tracking is optional.
 
-Scott

> 
> 
> > ---
> > fs/nfsd/nfs4recover.c | 24 +++++++++++-------------
> > 1 file changed, 11 insertions(+), 13 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
> > index 9e40dfecf1b1..186fa2c2c6ba 100644
> > --- a/fs/nfsd/nfs4recover.c
> > +++ b/fs/nfsd/nfs4recover.c
> > @@ -747,13 +747,11 @@ struct cld_upcall {
> > };
> > 
> > static int
> > -__cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
> > +__cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg, struct nfsd_net *nn)
> > {
> > 	int ret;
> > 	struct rpc_pipe_msg msg;
> > 	struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_u);
> > -	struct nfsd_net *nn = net_generic(pipe->dentry->d_sb->s_fs_info,
> > -					  nfsd_net_id);
> > 
> > 	memset(&msg, 0, sizeof(msg));
> > 	msg.data = cmsg;
> > @@ -773,7 +771,7 @@ __cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
> > }
> > 
> > static int
> > -cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
> > +cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg, struct nfsd_net *nn)
> > {
> > 	int ret;
> > 
> > @@ -782,7 +780,7 @@ cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
> > 	 *  upcalls queued.
> > 	 */
> > 	do {
> > -		ret = __cld_pipe_upcall(pipe, cmsg);
> > +		ret = __cld_pipe_upcall(pipe, cmsg, nn);
> > 	} while (ret == -EAGAIN);
> > 
> > 	return ret;
> > @@ -1115,7 +1113,7 @@ nfsd4_cld_create(struct nfs4_client *clp)
> > 	memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
> > 			clp->cl_name.len);
> > 
> > -	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
> > +	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
> > 	if (!ret) {
> > 		ret = cup->cu_u.cu_msg.cm_status;
> > 		set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
> > @@ -1180,7 +1178,7 @@ nfsd4_cld_create_v2(struct nfs4_client *clp)
> > 	} else
> > 		cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = 0;
> > 
> > -	ret = cld_pipe_upcall(cn->cn_pipe, cmsg);
> > +	ret = cld_pipe_upcall(cn->cn_pipe, cmsg, nn);
> > 	if (!ret) {
> > 		ret = cmsg->cm_status;
> > 		set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
> > @@ -1218,7 +1216,7 @@ nfsd4_cld_remove(struct nfs4_client *clp)
> > 	memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
> > 			clp->cl_name.len);
> > 
> > -	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
> > +	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
> > 	if (!ret) {
> > 		ret = cup->cu_u.cu_msg.cm_status;
> > 		clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
> > @@ -1261,7 +1259,7 @@ nfsd4_cld_check_v0(struct nfs4_client *clp)
> > 	memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
> > 			clp->cl_name.len);
> > 
> > -	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
> > +	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
> > 	if (!ret) {
> > 		ret = cup->cu_u.cu_msg.cm_status;
> > 		set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
> > @@ -1404,7 +1402,7 @@ nfsd4_cld_grace_start(struct nfsd_net *nn)
> > 	}
> > 
> > 	cup->cu_u.cu_msg.cm_cmd = Cld_GraceStart;
> > -	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
> > +	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
> > 	if (!ret)
> > 		ret = cup->cu_u.cu_msg.cm_status;
> > 
> > @@ -1432,7 +1430,7 @@ nfsd4_cld_grace_done_v0(struct nfsd_net *nn)
> > 
> > 	cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
> > 	cup->cu_u.cu_msg.cm_u.cm_gracetime = nn->boot_time;
> > -	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
> > +	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
> > 	if (!ret)
> > 		ret = cup->cu_u.cu_msg.cm_status;
> > 
> > @@ -1460,7 +1458,7 @@ nfsd4_cld_grace_done(struct nfsd_net *nn)
> > 	}
> > 
> > 	cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
> > -	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
> > +	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
> > 	if (!ret)
> > 		ret = cup->cu_u.cu_msg.cm_status;
> > 
> > @@ -1524,7 +1522,7 @@ nfsd4_cld_get_version(struct nfsd_net *nn)
> > 		goto out_err;
> > 	}
> > 	cup->cu_u.cu_msg.cm_cmd = Cld_GetVersion;
> > -	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
> > +	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
> > 	if (!ret) {
> > 		ret = cup->cu_u.cu_msg.cm_status;
> > 		if (ret)
> > -- 
> > 2.25.4
> > 
> 
> --
> Chuck Lever
> 
> 
> 





[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux