On Tue, May 6, 2014 at 7:40 PM, Bruce Fields <bfields@xxxxxxxxxxxx> wrote: > On Fri, Apr 18, 2014 at 02:44:04PM -0400, Trond Myklebust wrote: >> state_lock is a heavily contended global lock. We don't want to grab >> that while simultaneously holding the inode->i_lock >> Instead do the list manipulations from the work queue context prior to >> starting the rpc call. >> >> Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> >> --- >> fs/nfsd/nfs4callback.c | 18 ++++++++++++++++-- >> fs/nfsd/nfs4state.c | 25 +++++++++++++++---------- >> fs/nfsd/state.h | 1 + >> 3 files changed, 32 insertions(+), 12 deletions(-) >> >> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c >> index 39c8ef875f91..8626cd6af4d1 100644 >> --- a/fs/nfsd/nfs4callback.c >> +++ b/fs/nfsd/nfs4callback.c >> @@ -1009,9 +1009,8 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) >> run_nfsd4_cb(cb); >> } >> >> -static void nfsd4_do_callback_rpc(struct work_struct *w) >> +static void nfsd4_run_callback_rpc(struct nfsd4_callback *cb) >> { >> - struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work); >> struct nfs4_client *clp = cb->cb_clp; >> struct rpc_clnt *clnt; >> >> @@ -1029,11 +1028,25 @@ static void nfsd4_do_callback_rpc(struct work_struct *w) >> cb->cb_ops, cb); >> } >> >> +static void nfsd4_do_callback_rpc(struct work_struct *w) >> +{ >> + struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work); >> + nfsd4_run_callback_rpc(cb); >> +} >> + >> void nfsd4_init_callback(struct nfsd4_callback *cb) >> { >> INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); >> } >> >> +static void nfsd4_do_cb_recall(struct work_struct *w) >> +{ >> + struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work); >> + >> + nfsd4_prepare_cb_recall(cb->cb_op); >> + nfsd4_run_callback_rpc(cb); >> +} >> + >> void nfsd4_cb_recall(struct nfs4_delegation *dp) >> { >> struct nfsd4_callback *cb = &dp->dl_recall; >> @@ -1050,6 +1063,7 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp) >> >> INIT_LIST_HEAD(&cb->cb_per_client); >> cb->cb_done = true; >> + INIT_WORK(&cb->cb_work, nfsd4_do_cb_recall); >> >> run_nfsd4_cb(&dp->dl_recall); >> } >> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c >> index 6d691aa2bb27..85fcbc9ebd40 100644 >> --- a/fs/nfsd/nfs4state.c >> +++ b/fs/nfsd/nfs4state.c >> @@ -2743,24 +2743,31 @@ out: >> return ret; >> } >> >> -static void nfsd_break_one_deleg(struct nfs4_delegation *dp) >> +void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp) >> { >> struct nfs4_client *clp = dp->dl_stid.sc_client; >> struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); >> >> - lockdep_assert_held(&state_lock); >> + /* >> + * We can't do this in nfsd_break_deleg_cb because it is >> + * already holding inode->i_lock >> + */ >> + spin_lock(&state_lock); >> + if (list_empty(&dp->dl_recall_lru)) { >> + dp->dl_time = get_seconds(); >> + list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); >> + } >> + spin_unlock(&state_lock); >> +} >> + >> +static void nfsd_break_one_deleg(struct nfs4_delegation *dp) >> +{ >> /* We're assuming the state code never drops its reference >> * without first removing the lease. Since we're in this lease >> * callback (and since the lease code is serialized by the kernel >> * lock) we know the server hasn't removed the lease yet, we know >> * it's safe to take a reference: */ >> atomic_inc(&dp->dl_count); >> - >> - list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); >> - >> - /* Only place dl_time is set; protected by i_lock: */ >> - dp->dl_time = get_seconds(); >> - >> nfsd4_cb_recall(dp); >> } >> >> @@ -2785,11 +2792,9 @@ static void nfsd_break_deleg_cb(struct file_lock *fl) >> */ >> fl->fl_break_time = 0; >> >> - spin_lock(&state_lock); >> fp->fi_had_conflict = true; >> list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) >> nfsd_break_one_deleg(dp); >> - spin_unlock(&state_lock); > > Why do we know it's safe to traverse this list without holding > state_lock? We're in a callback that is already holding the inode->i_lock, and so this is why we need to involve inode->i_lock when hashing/unhashing the delegation. > --b. > >> } >> >> static >> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h >> index 1aa22f39fc65..02c5a203c738 100644 >> --- a/fs/nfsd/state.h >> +++ b/fs/nfsd/state.h >> @@ -473,6 +473,7 @@ extern void nfsd4_cb_recall(struct nfs4_delegation *dp); >> extern int nfsd4_create_callback_queue(void); >> extern void nfsd4_destroy_callback_queue(void); >> extern void nfsd4_shutdown_callback(struct nfs4_client *); >> +extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp); >> extern void nfs4_put_delegation(struct nfs4_delegation *dp); >> extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, >> struct nfsd_net *nn); >> -- >> 1.9.0 >> -- Trond Myklebust Linux NFS client maintainer, PrimaryData trond.myklebust@xxxxxxxxxxxxxxx -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html