> On Oct 28, 2022, at 2:57 PM, Jeff Layton <jlayton@xxxxxxxxxx> wrote: > > The list_lru_add and list_lru_del functions use list_empty checks to see > whether the object is already on the LRU. That's fine in most cases, but > we occasionally repurpose nf_lru after unhashing. It's possible for an > LRU removal to remove it from a different list altogether if we lose a > race. I've never seen that happen. lru field re-use is actually used in other places in the kernel. Shouldn't we try to find and fix such races? Wasn't the idea to reduce the complexity of nfsd_file_put ? > Add a new NFSD_FILE_LRU flag, which indicates that the object actually > resides on the LRU and not some other list. Use that when adding and > removing it from the LRU instead of just relying on list_empty checks. > > Add an extra HASHED check after adding the entry to the LRU. If it's now > clear, just remove it from the LRU again and put the reference if that > remove is successful. > > Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx> > --- > fs/nfsd/filecache.c | 44 +++++++++++++++++++++++++++++--------------- > fs/nfsd/filecache.h | 1 + > 2 files changed, 30 insertions(+), 15 deletions(-) > > diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c > index d928c5e38eeb..47cdc6129a7b 100644 > --- a/fs/nfsd/filecache.c > +++ b/fs/nfsd/filecache.c > @@ -403,18 +403,22 @@ nfsd_file_check_writeback(struct nfsd_file *nf) > static bool nfsd_file_lru_add(struct nfsd_file *nf) > { > set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); > - if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) { > - trace_nfsd_file_lru_add(nf); > - return true; > + if (!test_and_set_bit(NFSD_FILE_LRU, &nf->nf_flags)) { > + if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) { > + trace_nfsd_file_lru_add(nf); > + return true; > + } > } > return false; > } > > static bool nfsd_file_lru_remove(struct nfsd_file *nf) > { > - if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) { > - trace_nfsd_file_lru_del(nf); > - return true; > + if (test_and_clear_bit(NFSD_FILE_LRU, &nf->nf_flags)) { > + if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) { > + trace_nfsd_file_lru_del(nf); > + return true; > + } > } > return false; > } > @@ -469,20 +473,30 @@ nfsd_file_put(struct nfsd_file *nf) > { > trace_nfsd_file_put(nf); > > - /* > - * The HASHED check is racy. We may end up with the occasional > - * unhashed entry on the LRU, but they should get cleaned up > - * like any other. > - */ > if (test_bit(NFSD_FILE_GC, &nf->nf_flags) && > test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { > /* > - * If this is the last reference (nf_ref == 1), then transfer > - * it to the LRU. If the add to the LRU fails, just put it as > - * usual. > + * If this is the last reference (nf_ref == 1), then try to > + * transfer it to the LRU. > */ > - if (refcount_dec_not_one(&nf->nf_ref) || nfsd_file_lru_add(nf)) > + if (refcount_dec_not_one(&nf->nf_ref)) > return; > + > + /* Try to add it to the LRU. If that fails, decrement. */ > + if (nfsd_file_lru_add(nf)) { > + /* If it's still hashed, we're done */ > + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) > + return; > + > + /* > + * We're racing with unhashing, so try to remove it from > + * the LRU. If removal fails, then someone else already > + * has our reference and we're done. If it succeeds, > + * fall through to decrement. > + */ > + if (!nfsd_file_lru_remove(nf)) > + return; > + } > } > if (refcount_dec_and_test(&nf->nf_ref)) > nfsd_file_free(nf); > diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h > index b7efb2c3ddb1..e52ab7d5a44c 100644 > --- a/fs/nfsd/filecache.h > +++ b/fs/nfsd/filecache.h > @@ -39,6 +39,7 @@ struct nfsd_file { > #define NFSD_FILE_PENDING (1) > #define NFSD_FILE_REFERENCED (2) > #define NFSD_FILE_GC (3) > +#define NFSD_FILE_LRU (4) /* file is on LRU */ > unsigned long nf_flags; > struct inode *nf_inode; /* don't deref */ > refcount_t nf_ref; > -- > 2.37.3 > -- Chuck Lever