Hello, I'd like to report a nfsd_file leaking issue and propose a fix for it. When I tested Linux kernel 6.8 and 6.6, I noticed nfsd_file leaks which led to undestroyable file systems (zfs), here are some examples: crash> struct nfsd_file -x ffff88e160db0460 struct nfsd_file { nf_rlist = { rhead = { next = 0xffff8921fa2392f1 }, next = 0x0 }, nf_inode = 0xffff8882bc312ef8, nf_file = 0xffff88e2015b1500, nf_cred = 0xffff88e3ab0e7800, nf_net = 0xffffffff83d41600 <init_net>, nf_flags = 0x8, nf_ref = { refs = { counter = 0xc0000000 } }, nf_may = 0x4, nf_mark = 0xffff88e1bddfb320, nf_lru = { next = 0xffff88e160db04a8, prev = 0xffff88e160db04a8 }, nf_rcu = { next = 0x10000000000, func = 0x0 }, nf_birthtime = 0x73d22fc1728 } crash> struct nfsd_file.nf_flags,nf_ref.refs.counter,nf_lru,nf_file -x ffff88839a53d850 nf_flags = 0x8, nf_ref.refs.counter = 0x0 nf_lru = { next = 0xffff88839a53d898, prev = 0xffff88839a53d898 }, nf_file = 0xffff88810ede8700, crash> struct nfsd_file.nf_flags,nf_ref.refs.counter,nf_lru,nf_file -x ffff88c32b11e850 nf_flags = 0x8, nf_ref.refs.counter = 0x0 nf_lru = { next = 0xffff88c32b11e898, prev = 0xffff88c32b11e898 }, nf_file = 0xffff88c20a701c00, crash> struct nfsd_file.nf_flags,nf_ref.refs.counter,nf_lru,nf_file -x ffff88e372709700 nf_flags = 0xc, nf_ref.refs.counter = 0x0 nf_lru = { next = 0xffff88e372709748, prev = 0xffff88e372709748 }, nf_file = 0xffff88e0725e6400, crash> struct nfsd_file.nf_flags,nf_ref.refs.counter,nf_lru,nf_file -x ffff8982864944d0 nf_flags = 0xc, nf_ref.refs.counter = 0x0 nf_lru = { next = 0xffff898286494518, prev = 0xffff898286494518 }, nf_file = 0xffff89803c0ff700, The leak occurs when nfsd_file_put() races with nfsd_file_cond_queue() or nfsd_file_lru_cb(). With the following patch, I haven't observed any leak after a few days heavy nfs load: diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 1a6d5d000b85..2323829f7208 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -389,6 +389,17 @@ nfsd_file_put(struct nfsd_file *nf) if (!nfsd_file_lru_remove(nf)) return; } + /* + * Racing with nfsd_file_cond_queue() or nfsd_file_lru_cb(), + * it's unhashed but then removed from the dispose list, + * so we need to free it. + */ + if (refcount_read(&nf->nf_ref) == 0 && + !test_bit(NFSD_FILE_HASHED, &nf->nf_flags) && + list_empty(&nf->nf_lru)) { + nfsd_file_free(nf); + return; + } } if (refcount_dec_and_test(&nf->nf_ref)) nfsd_file_free(nf); @@ -576,7 +587,7 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) int decrement = 1; /* If we raced with someone else unhashing, ignore it */ - if (!nfsd_file_unhash(nf)) + if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) return; /* If we can't get a reference, ignore it */ @@ -590,6 +601,7 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) /* If refcount goes to 0, then put on the dispose list */ if (refcount_sub_and_test(decrement, &nf->nf_ref)) { list_add(&nf->nf_lru, dispose); + nfsd_file_unhash(nf); trace_nfsd_file_closing(nf); } } Please kindly review the patch and let me know if it makes sense. Thanks, -Youzhong