On Tue, Aug 02, 2016 at 02:15:29PM -0400, Jeff Layton wrote: > Create a new per-lockowner+per-inode structure that contains a > file_lock. Have nfsd4_lock add this structure to the lockowner's list > prior to setting the lock. Then call the vfs and request a blocking lock > (by setting FL_SLEEP). If we get anything besides FILE_LOCK_DEFERRED > back, then we dequeue the block structure and free it. When the next > lock request comes in, we'll look for an existing block for the same > filehandle and dequeue and reuse it if there is one. I'm probably just missing it because I only skimmed the patch quickly, but I don't see you distinguishing the blocking and non-blocking case. I think we only want to do this in the {READ,WRITE}W_LT case, as {READ,WRITE}_LT is a hint from the client that it's not going to wait for the lock. Sending a notify without a preceding blocking request is probably a (relatively harmless) protocol bug. --b. > > When the lock comes free (a'la an lm_notify call), we dequeue it > from the lockowner's list and kick off a CB_NOTIFY_LOCK callback to > inform the client that it should retry the lock request. > > Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx> > --- > fs/nfsd/nfs4state.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++----- > fs/nfsd/state.h | 12 +++-- > 2 files changed, 147 insertions(+), 17 deletions(-) > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index 70d0b9b33031..38367201c14f 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -99,6 +99,7 @@ static struct kmem_cache *odstate_slab; > static void free_session(struct nfsd4_session *); > > static const struct nfsd4_callback_ops nfsd4_cb_recall_ops; > +static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops; > > static bool is_session_dead(struct nfsd4_session *ses) > { > @@ -210,6 +211,84 @@ static void nfsd4_put_session(struct nfsd4_session *ses) > spin_unlock(&nn->client_lock); > } > > +static struct nfsd4_blocked_lock * > +find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh, > + struct nfsd_net *nn) > +{ > + struct nfsd4_blocked_lock *cur, *found = NULL; > + > + spin_lock(&nn->client_lock); > + list_for_each_entry(cur, &lo->lo_blocked, nbl_list) { > + if (fh_match(fh, &cur->nbl_fh)) { > + list_del_init(&cur->nbl_list); > + found = cur; > + break; > + } > + } > + spin_unlock(&nn->client_lock); > + if (found) > + posix_unblock_lock(&found->nbl_lock); > + return found; > +} > + > +static struct nfsd4_blocked_lock * > +find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, > + struct nfsd_net *nn) > +{ > + struct nfsd4_blocked_lock *nbl; > + > + nbl = find_blocked_lock(lo, fh, nn); > + if (!nbl) { > + nbl= kmalloc(sizeof(*nbl), GFP_KERNEL); > + if (nbl) { > + fh_copy_shallow(&nbl->nbl_fh, fh); > + locks_init_lock(&nbl->nbl_lock); > + nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client, > + &nfsd4_cb_notify_lock_ops, > + NFSPROC4_CLNT_CB_NOTIFY_LOCK); > + } > + } > + return nbl; > +} > + > +static void > +free_blocked_lock(struct nfsd4_blocked_lock *nbl) > +{ > + locks_release_private(&nbl->nbl_lock); > + kfree(nbl); > +} > + > +static int > +nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task) > +{ > + /* > + * Since this is just an optimization, we don't try very hard if it > + * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and > + * just quit trying on anything else. > + */ > + switch (task->tk_status) { > + case -NFS4ERR_DELAY: > + rpc_delay(task, 1 * HZ); > + return 0; > + default: > + return 1; > + } > +} > + > +static void > +nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb) > +{ > + struct nfsd4_blocked_lock *nbl = container_of(cb, > + struct nfsd4_blocked_lock, nbl_cb); > + > + free_blocked_lock(nbl); > +} > + > +static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = { > + .done = nfsd4_cb_notify_lock_done, > + .release = nfsd4_cb_notify_lock_release, > +}; > + > static inline struct nfs4_stateowner * > nfs4_get_stateowner(struct nfs4_stateowner *sop) > { > @@ -5296,7 +5375,29 @@ nfsd4_fl_put_owner(fl_owner_t owner) > nfs4_put_stateowner(&lo->lo_owner); > } > > +static void > +nfsd4_lm_notify(struct file_lock *fl) > +{ > + struct nfs4_lockowner *lo = (struct nfs4_lockowner *)fl->fl_owner; > + struct net *net = lo->lo_owner.so_client->net; > + struct nfsd_net *nn = net_generic(net, nfsd_net_id); > + struct nfsd4_blocked_lock *nbl = container_of(fl, > + struct nfsd4_blocked_lock, nbl_lock); > + bool queue = false; > + > + spin_lock(&nn->client_lock); > + if (!list_empty(&nbl->nbl_list)) { > + list_del_init(&nbl->nbl_list); > + queue = true; > + } > + spin_unlock(&nn->client_lock); > + > + if (queue) > + nfsd4_run_cb(&nbl->nbl_cb); > +} > + > static const struct lock_manager_operations nfsd_posix_mng_ops = { > + .lm_notify = nfsd4_lm_notify, > .lm_get_owner = nfsd4_fl_get_owner, > .lm_put_owner = nfsd4_fl_put_owner, > }; > @@ -5394,6 +5495,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, > lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp); > if (!lo) > return NULL; > + INIT_LIST_HEAD(&lo->lo_blocked); > INIT_LIST_HEAD(&lo->lo_owner.so_stateids); > lo->lo_owner.so_is_open_owner = 0; > lo->lo_owner.so_seqid = lock->lk_new_lock_seqid; > @@ -5558,12 +5660,14 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > struct nfs4_ol_stateid *open_stp = NULL; > struct nfs4_file *fp; > struct file *filp = NULL; > + struct nfsd4_blocked_lock *nbl = NULL; > struct file_lock *file_lock = NULL; > struct file_lock *conflock = NULL; > __be32 status = 0; > int lkflg; > int err; > bool new = false; > + unsigned char fl_type; > struct net *net = SVC_NET(rqstp); > struct nfsd_net *nn = net_generic(net, nfsd_net_id); > > @@ -5630,13 +5734,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > if (!locks_in_grace(net) && lock->lk_reclaim) > goto out; > > - file_lock = locks_alloc_lock(); > - if (!file_lock) { > - dprintk("NFSD: %s: unable to allocate lock!\n", __func__); > - status = nfserr_jukebox; > - goto out; > - } > - > fp = lock_stp->st_stid.sc_file; > switch (lock->lk_type) { > case NFS4_READ_LT: > @@ -5646,7 +5743,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > if (filp) > get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); > spin_unlock(&fp->fi_lock); > - file_lock->fl_type = F_RDLCK; > + fl_type = F_RDLCK; > break; > case NFS4_WRITE_LT: > case NFS4_WRITEW_LT: > @@ -5655,17 +5752,27 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > if (filp) > get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); > spin_unlock(&fp->fi_lock); > - file_lock->fl_type = F_WRLCK; > + fl_type = F_WRLCK; > break; > default: > status = nfserr_inval; > goto out; > } > + > if (!filp) { > status = nfserr_openmode; > goto out; > } > > + nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn); > + if (!nbl) { > + dprintk("NFSD: %s: unable to allocate block!\n", __func__); > + status = nfserr_jukebox; > + goto out; > + } > + > + file_lock = &nbl->nbl_lock; > + file_lock->fl_type = fl_type; > file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner)); > file_lock->fl_pid = current->tgid; > file_lock->fl_file = filp; > @@ -5682,18 +5789,28 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > goto out; > } > > + if (nfsd4_has_session(cstate)) { > + file_lock->fl_flags |= FL_SLEEP; > + spin_lock(&nn->client_lock); > + list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); > + spin_unlock(&nn->client_lock); > + } > + > err = vfs_lock_file(filp, F_SETLK, file_lock, conflock); > - switch (-err) { > + switch (err) { > case 0: /* success! */ > nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid); > status = 0; > break; > - case (EAGAIN): /* conflock holds conflicting lock */ > + case FILE_LOCK_DEFERRED: > + nbl = NULL; > + /* Fallthrough */ > + case EAGAIN: /* conflock holds conflicting lock */ > status = nfserr_denied; > dprintk("NFSD: nfsd4_lock: conflicting lock found!\n"); > nfs4_set_lock_denied(conflock, &lock->lk_denied); > break; > - case (EDEADLK): > + case EDEADLK: > status = nfserr_deadlock; > break; > default: > @@ -5702,6 +5819,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > break; > } > out: > + if (nbl) { > + /* dequeue it if we queued it before */ > + if (nfsd4_has_session(cstate)) { > + spin_lock(&nn->client_lock); > + list_del_init(&nbl->nbl_list); > + spin_unlock(&nn->client_lock); > + } > + free_blocked_lock(nbl); > + } > if (filp) > fput(filp); > if (lock_stp) { > @@ -5725,8 +5851,6 @@ out: > if (open_stp) > nfs4_put_stid(&open_stp->st_stid); > nfsd4_bump_seqid(cstate, status); > - if (file_lock) > - locks_free_lock(file_lock); > if (conflock) > locks_free_lock(conflock); > return status; > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > index ecf3f4671654..056b0e4c485b 100644 > --- a/fs/nfsd/state.h > +++ b/fs/nfsd/state.h > @@ -440,11 +440,11 @@ struct nfs4_openowner { > /* > * Represents a generic "lockowner". Similar to an openowner. References to it > * are held by the lock stateids that are created on its behalf. This object is > - * a superset of the nfs4_stateowner struct (or would be if it needed any extra > - * fields). > + * a superset of the nfs4_stateowner struct. > */ > struct nfs4_lockowner { > - struct nfs4_stateowner lo_owner; /* must be first element */ > + struct nfs4_stateowner lo_owner; /* must be first element */ > + struct list_head lo_blocked; /* blocked file_locks */ > }; > > static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so) > @@ -580,7 +580,13 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b) > return (s32)(a->si_generation - b->si_generation) > 0; > } > > +/* > + * When a client tries to get a lock on a file, we set one of these objects > + * on the blocking lock. When the lock becomes free, we can then issue a > + * CB_NOTIFY_LOCK to the server. > + */ > struct nfsd4_blocked_lock { > + struct list_head nbl_list; > struct file_lock nbl_lock; > struct knfsd_fh nbl_fh; > struct nfsd4_callback nbl_cb; > -- > 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html