On Fri, 2017-02-17 at 13:46 -0500, Benjamin Coddington wrote: > NFS attempts to wait for read and write completion before unlocking > in > order to ensure that the data returned was protected by the > lock. When > this waiting is interrupted by a signal, the unlock may never be > sent, and > messages similar to the following are seen in the kernel ring buffer: > > [20.167876] Leaked locks on dev=0x0:0x2b ino=0x8dd4c3: > [20.168286] POSIX: fl_owner=ffff880078b06940 fl_flags=0x1 fl_type=0x0 > fl_pid=20183 > [20.168727] POSIX: fl_owner=ffff880078b06680 fl_flags=0x1 fl_type=0x0 > fl_pid=20185 > > For NFSv3, the missing unlock will cause the server to refuse > conflicting > locks indefinitely. For NFSv4, the leftover lock will be removed by > the > server after the lease timeout. > > This patch fixes this for NFSv3 by skipping the wait in order to > immediately send the unlock if the FL_CLOSE flag is set when > signaled. For > NFSv4, this approach may cause the server to see the I/O as arriving > with > an old stateid, so, for the v4 case the fix is different: the wait on > I/O > completion is moved into nfs4_locku_ops' rpc_call_prepare(). This > will > cause the sleep to happen in rpciod context, and a signal to the > originally > waiting process will not cause the unlock to be skipped. NACK. I/O waits in rpciod contexts are NOT acceptable. rpciod is part of the memory reclaim chain, so having it sleep on I/O is deadlock prone. Why is there a need to wait for I/O completion in the first place if the user has killed the task that held the lock? 'kill -9' will cause corruption; that's a fact that no amount of paper will cover over. > > Signed-off-by: Benjamin Coddington <bcodding@xxxxxxxxxx> > --- > fs/nfs/file.c | 13 ------------- > fs/nfs/nfs3proc.c | 13 +++++++++++++ > fs/nfs/nfs4proc.c | 7 +++++++ > fs/nfs/pagelist.c | 1 + > 4 files changed, 21 insertions(+), 13 deletions(-) > > diff --git a/fs/nfs/file.c b/fs/nfs/file.c > index a490f45df4db..adc67fe762e3 100644 > --- a/fs/nfs/file.c > +++ b/fs/nfs/file.c > @@ -684,7 +684,6 @@ static int > do_unlk(struct file *filp, int cmd, struct file_lock *fl, int > is_local) > { > struct inode *inode = filp->f_mapping->host; > - struct nfs_lock_context *l_ctx; > int status; > > /* > @@ -693,18 +692,6 @@ do_unlk(struct file *filp, int cmd, struct > file_lock *fl, int is_local) > */ > vfs_fsync(filp, 0); > > - l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); > - if (!IS_ERR(l_ctx)) { > - status = nfs_iocounter_wait(l_ctx); > - nfs_put_lock_context(l_ctx); > - if (status < 0) > - return status; > - } > - > - /* NOTE: special case > - * If we're signalled while cleaning up locks on > process exit, we > - * still need to complete the unlock. > - */ > /* > * Use local locking if mounted with "-onolock" or with > appropriate > * "-olocal_lock=" > diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c > index dc925b531f32..ec3f12571c82 100644 > --- a/fs/nfs/nfs3proc.c > +++ b/fs/nfs/nfs3proc.c > @@ -869,6 +869,19 @@ static int > nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) > { > struct inode *inode = file_inode(filp); > + int status; > + struct nfs_lock_context *l_ctx; > + > + /* For v3, always send the unlock on FL_CLOSE */ > + if (fl->fl_type == F_UNLCK) { > + l_ctx = > nfs_get_lock_context(nfs_file_open_context(filp)); > + if (!IS_ERR(l_ctx)) { > + status = nfs_iocounter_wait(l_ctx); > + nfs_put_lock_context(l_ctx); > + if (status < 0 && !(fl->fl_flags & > FL_CLOSE)) > + return status; > + } > + } > > return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); > } > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c > index 91f88bfbbe79..052b97fd653d 100644 > --- a/fs/nfs/nfs4proc.c > +++ b/fs/nfs/nfs4proc.c > @@ -5906,6 +5906,7 @@ struct nfs4_unlockdata { > struct nfs_locku_res res; > struct nfs4_lock_state *lsp; > struct nfs_open_context *ctx; > + struct nfs_lock_context *l_ctx; > struct file_lock fl; > struct nfs_server *server; > unsigned long timestamp; > @@ -5930,6 +5931,7 @@ static struct nfs4_unlockdata > *nfs4_alloc_unlockdata(struct file_lock *fl, > atomic_inc(&lsp->ls_count); > /* Ensure we don't close file until we're done freeing > locks! */ > p->ctx = get_nfs_open_context(ctx); > + p->l_ctx = nfs_get_lock_context(ctx); > memcpy(&p->fl, fl, sizeof(p->fl)); > p->server = NFS_SERVER(inode); > return p; > @@ -5988,6 +5990,11 @@ static void nfs4_locku_prepare(struct rpc_task > *task, void *data) > /* Note: exit _without_ running nfs4_locku_done */ > goto out_no_action; > } > + > + if (!IS_ERR(calldata->l_ctx)) { > + nfs_iocounter_wait(calldata->l_ctx); > + nfs_put_lock_context(calldata->l_ctx); > + } > calldata->timestamp = jiffies; > if (nfs4_setup_sequence(calldata->server, > &calldata->arg.seq_args, > diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c > index 6e629b856a00..8bf3cefdaa96 100644 > --- a/fs/nfs/pagelist.c > +++ b/fs/nfs/pagelist.c > @@ -114,6 +114,7 @@ nfs_iocounter_wait(struct nfs_lock_context > *l_ctx) > return wait_on_atomic_t(&l_ctx->io_count, > nfs_wait_atomic_killable, > TASK_KILLABLE); > } > +EXPORT_SYMBOL(nfs_iocounter_wait); > > /* > * nfs_page_group_lock - lock the head of the page group -- Trond Myklebust Principal System Architect 4300 El Camino Real | Suite 100 Los Altos, CA 94022 W: 650-422-3800 C: 801-921-4583 www.primarydata.com ��.n��������+%������w��{.n�����{��w���jg��������ݢj����G�������j:+v���w�m������w�������h�����٥