Re: [PATCH 1/2] NFSv4.1: Handle NFS4ERR_BADSESSION/NFS4ERR_DEADSESSION replies to OP_SEQUENCE

Trond Myklebust <trondmy@xxxxxxxxxxxxxxx> · Fri, 17 Feb 2017 14:58:07 +0000

On Fri, 2017-02-17 at 09:46 -0500, Olga Kornievskaia wrote:
> On Thu, Feb 16, 2017 at 6:28 PM, Trond Myklebust
> <trondmy@xxxxxxxxxxxxxxx> wrote:
> > On Thu, 2017-02-16 at 17:14 -0500, Olga Kornievskaia wrote:
> > > On Thu, Feb 16, 2017 at 4:45 PM, Trond Myklebust
> > > <trondmy@xxxxxxxxxxxxxxx> wrote:
> > > > 
> > > > Olga, all your test is doing is showing that we hit the race
> > > > more
> > > > often. Fair enough, I’ll ask Anna to revert the patch. However
> > > > reverting the patch won’t prevent the server from returning
> > > > NFS4ERR_BAD_STATEID in any cases where the calls to
> > > > nfs4_set_rw_stateid() happen before state recovery. This is why
> > > > we
> > > > have the loop in nfs42_proc_copy().
> > > 
> > > I thought that if retry of the operation waits for the recovery
> > > to
> > > complete then nfs4_set_rw_stateid() will choose the correct
> > > stateid,
> > > is that not correct?
> > > 
> > > If that's not correct, then we somehow need to separate the case
> > > when
> > > BAD_STATEID should indeed mark the locks lost vs this case where
> > > the
> > > code erroneously used the bad stateid (oops) and it should really
> > > ignore this error. This really doesn't sound very plausible to
> > > accomplish.
> > 
> > Does this patch fix the problem?
> > 
> > 8<-------------------------------------------------------------
> > From bbae95e8f97cad6a91ca8caa50b61cae789632f9 Mon Sep 17 00:00:00
> > 2001
> > From: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>
> > Date: Thu, 16 Feb 2017 18:14:38 -0500
> > Subject: [PATCH] NFSv4: Fix reboot recovery in copy offload
> > 
> > Copy offload code needs to be hooked into the code for handling
> > NFS4ERR_BAD_STATEID by ensuring that we set the "stateid" field
> > in struct nfs4_exception.
> > 
> > Reported-by: Olga Kornievskaia <aglo@xxxxxxxxx>
> > Fixes: 2e72448b07dc3 ("NFS: Add COPY nfs operation")
> > Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>
> > ---
> >  fs/nfs/nfs42proc.c | 57 +++++++++++++++++++++++++++++++-----------
> > ------------
> >  1 file changed, 33 insertions(+), 24 deletions(-)
> > 
> > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > index d12ff9385f49..baf1fe2dc296 100644
> > --- a/fs/nfs/nfs42proc.c
> > +++ b/fs/nfs/nfs42proc.c
> > @@ -128,20 +128,13 @@ int nfs42_proc_deallocate(struct file *filep,
> > loff_t offset, loff_t len)
> >         return err;
> >  }
> > 
> > -static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
> > +static ssize_t _nfs42_proc_copy(struct file *src,
> >                                 struct nfs_lock_context *src_lock,
> > -                               struct file *dst, loff_t pos_dst,
> > +                               struct file *dst,
> >                                 struct nfs_lock_context *dst_lock,
> > -                               size_t count)
> > +                               struct nfs42_copy_args *args,
> > +                               struct nfs42_copy_res *res)
> >  {
> > -       struct nfs42_copy_args args = {
> > -               .src_fh         = NFS_FH(file_inode(src)),
> > -               .src_pos        = pos_src,
> > -               .dst_fh         = NFS_FH(file_inode(dst)),
> > -               .dst_pos        = pos_dst,
> > -               .count          = count,
> > -       };
> > -       struct nfs42_copy_res res;
> >         struct rpc_message msg = {
> >                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY],
> >                 .rpc_argp = &args,
> > @@ -149,9 +142,12 @@ static ssize_t _nfs42_proc_copy(struct file
> > *src, loff_t pos_src,
> >         };
> >         struct inode *dst_inode = file_inode(dst);
> >         struct nfs_server *server = NFS_SERVER(dst_inode);
> > +       loff_t pos_src = args->src_pos;
> > +       loff_t pos_dst = args->dst_pos;
> > +       size_t count = args->count;
> >         int status;
> > 
> > -       status = nfs4_set_rw_stateid(&args.src_stateid, src_lock-
> > >open_context,
> > +       status = nfs4_set_rw_stateid(&args->src_stateid, src_lock-
> > >open_context,
> >                                      src_lock, FMODE_READ);
> >         if (status)
> >                 return status;
> > @@ -161,7 +157,7 @@ static ssize_t _nfs42_proc_copy(struct file
> > *src, loff_t pos_src,
> >         if (status)
> >                 return status;
> > 
> > -       status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock-
> > >open_context,
> > +       status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock-
> > >open_context,
> >                                      dst_lock, FMODE_WRITE);
> >         if (status)
> >                 return status;
> > @@ -171,22 +167,22 @@ static ssize_t _nfs42_proc_copy(struct file
> > *src, loff_t pos_src,
> >                 return status;
> > 
> >         status = nfs4_call_sync(server->client, server, &msg,
> > -                               &args.seq_args, &res.seq_res, 0);
> > +                               &args->seq_args, &res->seq_res, 0);
> >         if (status == -ENOTSUPP)
> >                 server->caps &= ~NFS_CAP_COPY;
> >         if (status)
> >                 return status;
> > 
> > -       if (res.write_res.verifier.committed != NFS_FILE_SYNC) {
> > -               status = nfs_commit_file(dst,
> > &res.write_res.verifier.verifier);
> > +       if (res->write_res.verifier.committed != NFS_FILE_SYNC) {
> > +               status = nfs_commit_file(dst, &res-
> > >write_res.verifier.verifier);
> >                 if (status)
> >                         return status;
> >         }
> > 
> >         truncate_pagecache_range(dst_inode, pos_dst,
> > -                                pos_dst + res.write_res.count);
> > +                                pos_dst + res->write_res.count);
> > 
> > -       return res.write_res.count;
> > +       return res->write_res.count;
> >  }
> > 
> >  ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
> > @@ -196,8 +192,22 @@ ssize_t nfs42_proc_copy(struct file *src,
> > loff_t pos_src,
> >         struct nfs_server *server = NFS_SERVER(file_inode(dst));
> >         struct nfs_lock_context *src_lock;
> >         struct nfs_lock_context *dst_lock;
> > -       struct nfs4_exception src_exception = { };
> > -       struct nfs4_exception dst_exception = { };
> > +       struct nfs42_copy_args args = {
> > +               .src_fh         = NFS_FH(file_inode(src)),
> > +               .src_pos        = pos_src,
> > +               .dst_fh         = NFS_FH(file_inode(dst)),
> > +               .dst_pos        = pos_dst,
> > +               .count          = count,
> > +       };
> > +       struct nfs42_copy_res res;
> > +       struct nfs4_exception src_exception = {
> > +               .inode          = file_inode(src),
> > +               .stateid        = &args.src_stateid,
> > +       };
> > +       struct nfs4_exception dst_exception = {
> > +               .inode          = file_inode(dst),
> > +               .stateid        = &args.dst_stateid,
> > +       };
> >         ssize_t err, err2;
> > 
> >         if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY))
> > @@ -207,7 +217,6 @@ ssize_t nfs42_proc_copy(struct file *src,
> > loff_t pos_src,
> >         if (IS_ERR(src_lock))
> >                 return PTR_ERR(src_lock);
> > 
> > -       src_exception.inode = file_inode(src);
> >         src_exception.state = src_lock->open_context->state;
> > 
> >         dst_lock =
> > nfs_get_lock_context(nfs_file_open_context(dst));
> > @@ -216,13 +225,13 @@ ssize_t nfs42_proc_copy(struct file *src,
> > loff_t pos_src,
> >                 goto out_put_src_lock;
> >         }
> > 
> > -       dst_exception.inode = file_inode(dst);
> >         dst_exception.state = dst_lock->open_context->state;
> > 
> >         do {
> >                 inode_lock(file_inode(dst));
> > -               err = _nfs42_proc_copy(src, pos_src, src_lock,
> > -                                      dst, pos_dst, dst_lock,
> > count);
> > +               err = _nfs42_proc_copy(src, src_lock,
> > +                               dst, dst_lock,
> > +                               &args, &res);
> >                 inode_unlock(file_inode(dst));
> > 
> >                 if (err == -ENOTSUPP) {
> > --
> > 2.9.3
> 
> I wish it did but no it does not.
> 

So what is still happening? With this patch, the error handler should
be able to distinguish between a stateid that is up to date and one
that isn't.

There might, however, still be a problem because we have 2 stateids,
meaning that one could be stale (and generating NFS4ERR_BAD_STATEID)
and the other one not. We might have to special case that, and do the
comparisons inside nfs42_proc_copy instead of using the generic code in
nfs4_handle_exception().

-- 
Trond Myklebust
Linux NFS client maintainer, PrimaryData
trond.myklebust@xxxxxxxxxxxxxxx
��.n��������+%������w��{.n�����{��w���jg��������ݢj����G�������j:+v���w�m������w�������h�����٥