move_to_close_lru() is currently called with ->st_mutex and .rp_mutex held. This can lead to a deadlock as move_to_close_lru() waits for sc_count to drop to 2, and some threads holding a reference might be waiting for either mutex. These references will never be dropped so sc_count will never reach 2. There have been a couple of attempted fixes (see [1] and [2]), but both were problematic for different reasons. This patch attempts to break the impasse by simply not waiting for the rp_mutex. If it's contended then we just have it return NFS4ERR_DELAY. This will likely cause parallel opens by the same openowner to be even slower on NFSv4.0, but it should break the deadlock. One way to address the performance impact might be to allow the wait for the mutex to time out after a period of time (30ms would be the same as NFSD_DELEGRETURN_TIMEOUT). We'd need to add a mutex_lock_timeout function in order for that to work. Chuck also suggested that we may be able to utilize the svc_defer mechanism instead of returning NFS4ERR_DELAY in this situation, but I'm not quite sure how feasible that is. [1]: https://lore.kernel.org/lkml/4dd1fe21e11344e5969bb112e954affb@xxxxxx/T/ [2]: https://lore.kernel.org/linux-nfs/170546328406.23031.11217818844350800811@xxxxxxxxxxxxxxxxxxxxx/ Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx> --- fs/nfsd/nfs4state.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index aee12adf0598..4b667eeb06c8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4658,13 +4658,16 @@ static void init_nfs4_replay(struct nfs4_replay *rp) mutex_init(&rp->rp_mutex); } -static void nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate, +static __be32 nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate, struct nfs4_stateowner *so) { if (!nfsd4_has_session(cstate)) { - mutex_lock(&so->so_replay.rp_mutex); + WARN_ON_ONCE(cstate->replay_owner); + if (!mutex_trylock(&so->so_replay.rp_mutex)) + return nfserr_jukebox; cstate->replay_owner = nfs4_get_stateowner(so); } + return nfs_ok; } void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate) @@ -5332,15 +5335,17 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, strhashval = ownerstr_hashval(&open->op_owner); oo = find_openstateowner_str(strhashval, open, clp); open->op_openowner = oo; - if (!oo) { + if (!oo) goto new_owner; - } if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { /* Replace unconfirmed owners without checking for replay. */ release_openowner(oo); open->op_openowner = NULL; goto new_owner; } + status = nfsd4_cstate_assign_replay(cstate, &oo->oo_owner); + if (status) + return status; status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid); if (status) return status; @@ -5350,6 +5355,9 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, if (oo == NULL) return nfserr_jukebox; open->op_openowner = oo; + status = nfsd4_cstate_assign_replay(cstate, &oo->oo_owner); + if (status) + return status; alloc_stateid: open->op_stp = nfs4_alloc_open_stateid(clp); if (!open->op_stp) @@ -6121,12 +6129,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, struct nfsd4_open *open) { - if (open->op_openowner) { - struct nfs4_stateowner *so = &open->op_openowner->oo_owner; - - nfsd4_cstate_assign_replay(cstate, so); - nfs4_put_stateowner(so); - } + if (open->op_openowner) + nfs4_put_stateowner(&open->op_openowner->oo_owner); if (open->op_file) kmem_cache_free(file_slab, open->op_file); if (open->op_stp) @@ -7193,9 +7197,9 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, if (status) return status; stp = openlockstateid(s); - nfsd4_cstate_assign_replay(cstate, stp->st_stateowner); - - status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp); + status = nfsd4_cstate_assign_replay(cstate, stp->st_stateowner); + if (!status) + status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp); if (!status) *stpp = stp; else --- base-commit: 2eb3d14898b97bdc0596d184cbf829b5a81cd639 change-id: 20240229-rp_mutex-d20e3319e315 Best regards, -- Jeff Layton <jlayton@xxxxxxxxxx>