On Jun 14, 2016, at 11:38 AM, J . Bruce Fields wrote: > On Sun, Jun 12, 2016 at 09:26:27PM -0400, Oleg Drokin wrote: >> It used to be the case that state had an rwlock that was locked for write >> by downgrades, but for read for upgrades (opens). Well, the problem is >> if there are two competing opens for the same state, they step on >> each other toes potentially leading to leaking file descriptors >> from the state structure, since access mode is a bitmap only set once. >> >> Extend the holding region around in nfsd4_process_open2() to avoid >> racing entry into nfs4_get_vfs_file(). >> Make init_open_stateid() return with locked stateid to be unlocked >> by the caller. >> >> Now this version held up pretty well in my testing for 24 hours. >> It still does not address the situation if during one of the racing >> nfs4_get_vfs_file() calls we are getting an error from one (first?) >> of them. This is to be addressed in a separate patch after having a >> solid reproducer (potentially using some fault injection). >> >> Signed-off-by: Oleg Drokin <green@xxxxxxxxxxxxxx> >> --- >> fs/nfsd/nfs4state.c | 47 +++++++++++++++++++++++++++-------------------- >> fs/nfsd/state.h | 2 +- >> 2 files changed, 28 insertions(+), 21 deletions(-) >> >> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c >> index f5f82e1..fa5fb5a 100644 >> --- a/fs/nfsd/nfs4state.c >> +++ b/fs/nfsd/nfs4state.c >> @@ -3487,6 +3487,10 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, >> struct nfs4_openowner *oo = open->op_openowner; >> struct nfs4_ol_stateid *retstp = NULL; >> >> + /* We are moving these outside of the spinlocks to avoid the warnings */ >> + mutex_init(&stp->st_mutex); >> + mutex_lock(&stp->st_mutex); >> + >> spin_lock(&oo->oo_owner.so_client->cl_lock); >> spin_lock(&fp->fi_lock); >> >> @@ -3502,13 +3506,14 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, >> stp->st_access_bmap = 0; >> stp->st_deny_bmap = 0; >> stp->st_openstp = NULL; >> - init_rwsem(&stp->st_rwsem); >> list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); >> list_add(&stp->st_perfile, &fp->fi_stateids); >> >> out_unlock: >> spin_unlock(&fp->fi_lock); >> spin_unlock(&oo->oo_owner.so_client->cl_lock); >> + if (retstp) >> + mutex_lock(&retstp->st_mutex); >> return retstp; > > You're returning with both stp->st_mutex and retstp->st_mutex locked. > Did you mean to drop that first lock in the (retstp) case, or am I > missing something? Well, I think it's ok (perhaps worthy of a comment) it's that if we matched a different retstp state, then stp is not used and either released right away or even if reused, it would be reinitialized in another call to init_open_stateid(), so it's fine? > > --b. > >> } >> >> @@ -4335,32 +4340,34 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf >> */ >> if (stp) { >> /* Stateid was found, this is an OPEN upgrade */ >> - down_read(&stp->st_rwsem); >> + mutex_lock(&stp->st_mutex); >> status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); >> if (status) { >> - up_read(&stp->st_rwsem); >> + mutex_unlock(&stp->st_mutex); >> goto out; >> } >> } else { >> stp = open->op_stp; >> open->op_stp = NULL; >> + /* >> + * init_open_stateid() either returns a locked stateid >> + * it found, or initializes and locks the new one we passed in >> + */ >> swapstp = init_open_stateid(stp, fp, open); >> if (swapstp) { >> nfs4_put_stid(&stp->st_stid); >> stp = swapstp; >> - down_read(&stp->st_rwsem); >> status = nfs4_upgrade_open(rqstp, fp, current_fh, >> stp, open); >> if (status) { >> - up_read(&stp->st_rwsem); >> + mutex_unlock(&stp->st_mutex); >> goto out; >> } >> goto upgrade_out; >> } >> - down_read(&stp->st_rwsem); >> status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); >> if (status) { >> - up_read(&stp->st_rwsem); >> + mutex_unlock(&stp->st_mutex); >> release_open_stateid(stp); >> goto out; >> } >> @@ -4372,7 +4379,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf >> } >> upgrade_out: >> nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid); >> - up_read(&stp->st_rwsem); >> + mutex_unlock(&stp->st_mutex); >> >> if (nfsd4_has_session(&resp->cstate)) { >> if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { >> @@ -4977,12 +4984,12 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ >> * revoked delegations are kept only for free_stateid. >> */ >> return nfserr_bad_stateid; >> - down_write(&stp->st_rwsem); >> + mutex_lock(&stp->st_mutex); >> status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); >> if (status == nfs_ok) >> status = nfs4_check_fh(current_fh, &stp->st_stid); >> if (status != nfs_ok) >> - up_write(&stp->st_rwsem); >> + mutex_unlock(&stp->st_mutex); >> return status; >> } >> >> @@ -5030,7 +5037,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs >> return status; >> oo = openowner(stp->st_stateowner); >> if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { >> - up_write(&stp->st_rwsem); >> + mutex_unlock(&stp->st_mutex); >> nfs4_put_stid(&stp->st_stid); >> return nfserr_bad_stateid; >> } >> @@ -5062,12 +5069,12 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> oo = openowner(stp->st_stateowner); >> status = nfserr_bad_stateid; >> if (oo->oo_flags & NFS4_OO_CONFIRMED) { >> - up_write(&stp->st_rwsem); >> + mutex_unlock(&stp->st_mutex); >> goto put_stateid; >> } >> oo->oo_flags |= NFS4_OO_CONFIRMED; >> nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid); >> - up_write(&stp->st_rwsem); >> + mutex_unlock(&stp->st_mutex); >> dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", >> __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); >> >> @@ -5143,7 +5150,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, >> nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid); >> status = nfs_ok; >> put_stateid: >> - up_write(&stp->st_rwsem); >> + mutex_unlock(&stp->st_mutex); >> nfs4_put_stid(&stp->st_stid); >> out: >> nfsd4_bump_seqid(cstate, status); >> @@ -5196,7 +5203,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> if (status) >> goto out; >> nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); >> - up_write(&stp->st_rwsem); >> + mutex_unlock(&stp->st_mutex); >> >> nfsd4_close_open_stateid(stp); >> >> @@ -5422,7 +5429,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, >> stp->st_access_bmap = 0; >> stp->st_deny_bmap = open_stp->st_deny_bmap; >> stp->st_openstp = open_stp; >> - init_rwsem(&stp->st_rwsem); >> + mutex_init(&stp->st_mutex); >> list_add(&stp->st_locks, &open_stp->st_locks); >> list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); >> spin_lock(&fp->fi_lock); >> @@ -5591,7 +5598,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> &open_stp, nn); >> if (status) >> goto out; >> - up_write(&open_stp->st_rwsem); >> + mutex_unlock(&open_stp->st_mutex); >> open_sop = openowner(open_stp->st_stateowner); >> status = nfserr_bad_stateid; >> if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, >> @@ -5600,7 +5607,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> status = lookup_or_create_lock_state(cstate, open_stp, lock, >> &lock_stp, &new); >> if (status == nfs_ok) >> - down_write(&lock_stp->st_rwsem); >> + mutex_lock(&lock_stp->st_mutex); >> } else { >> status = nfs4_preprocess_seqid_op(cstate, >> lock->lk_old_lock_seqid, >> @@ -5704,7 +5711,7 @@ out: >> seqid_mutating_err(ntohl(status))) >> lock_sop->lo_owner.so_seqid++; >> >> - up_write(&lock_stp->st_rwsem); >> + mutex_unlock(&lock_stp->st_mutex); >> >> /* >> * If this is a new, never-before-used stateid, and we are >> @@ -5874,7 +5881,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> fput: >> fput(filp); >> put_stateid: >> - up_write(&stp->st_rwsem); >> + mutex_unlock(&stp->st_mutex); >> nfs4_put_stid(&stp->st_stid); >> out: >> nfsd4_bump_seqid(cstate, status); >> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h >> index 986e51e..64053ea 100644 >> --- a/fs/nfsd/state.h >> +++ b/fs/nfsd/state.h >> @@ -535,7 +535,7 @@ struct nfs4_ol_stateid { >> unsigned char st_access_bmap; >> unsigned char st_deny_bmap; >> struct nfs4_ol_stateid *st_openstp; >> - struct rw_semaphore st_rwsem; >> + struct mutex st_mutex; >> }; >> >> static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) >> -- >> 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html