On 1/11/23 2:47 AM, Jeff Layton wrote:
On Wed, 2023-01-11 at 02:24 -0800, Dai Ngo wrote:
Currently nfsd4_state_shrinker_worker can be schduled multiple times
from nfsd4_state_shrinker_count when memory is low. This causes
the WARN_ON_ONCE in __queue_delayed_work to trigger.
This patch allows only one instance of nfsd4_state_shrinker_worker
at a time using the nfsd_shrinker_active flag, protected by the
client_lock.
Change nfsd_shrinker_work from delayed_work to work_struct since we
don't use the delay.
Replace mod_delayed_work in nfsd4_state_shrinker_count with queue_work.
Cancel work_struct nfsd_shrinker_work after unregistering shrinker
in nfs4_state_shutdown_net
Fixes: 44df6f439a17 ("NFSD: add delegation reaper to react to low memory condition")
Reported-by: Mike Galbraith <efault@xxxxxx>
Signed-off-by: Dai Ngo <dai.ngo@xxxxxxxxxx>
---
v2:
. Change nfsd_shrinker_work from delayed_work to work_struct
. Replace mod_delayed_work in nfsd4_state_shrinker_count with queue_work
. Cancel work_struct nfsd_shrinker_work after unregistering shrinker
v3:
. set nfsd_shrinker_active earlier in nfsd4_state_shrinker_count
fs/nfsd/netns.h | 3 ++-
fs/nfsd/nfs4state.c | 24 +++++++++++++++++++-----
2 files changed, 21 insertions(+), 6 deletions(-)
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 8c854ba3285b..b0c7b657324b 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -195,7 +195,8 @@ struct nfsd_net {
atomic_t nfsd_courtesy_clients;
struct shrinker nfsd_client_shrinker;
- struct delayed_work nfsd_shrinker_work;
+ struct work_struct nfsd_shrinker_work;
+ bool nfsd_shrinker_active;
};
/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a7cfefd7c205..35ec4cba88b3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4407,11 +4407,22 @@ nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
struct nfsd_net *nn = container_of(shrink,
struct nfsd_net, nfsd_client_shrinker);
+ spin_lock(&nn->client_lock);
+ if (nn->nfsd_shrinker_active) {
+ spin_unlock(&nn->client_lock);
+ return 0;
+ }
+ nn->nfsd_shrinker_active = true;
count = atomic_read(&nn->nfsd_courtesy_clients);
if (!count)
count = atomic_long_read(&num_delegations);
- if (count)
- mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0);
+ if (count) {
+ spin_unlock(&nn->client_lock);
+ queue_work(laundry_wq, &nn->nfsd_shrinker_work);
+ } else {
+ nn->nfsd_shrinker_active = false;
+ spin_unlock(&nn->client_lock);
+ }
The change to normal work_struct is an improvement, but NAK on this
patch. The spinlocking and flag are not needed here. I seriously doubt
that we have a clear understanding of this problem.
Agreed. We need to get to the bottom of this.
-Dai
return (unsigned long)count;
}
@@ -6233,12 +6244,14 @@ deleg_reaper(struct nfsd_net *nn)
static void
nfsd4_state_shrinker_worker(struct work_struct *work)
{
- struct delayed_work *dwork = to_delayed_work(work);
- struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+ struct nfsd_net *nn = container_of(work, struct nfsd_net,
nfsd_shrinker_work);
courtesy_client_reaper(nn);
deleg_reaper(nn);
+ spin_lock(&nn->client_lock);
+ nn->nfsd_shrinker_active = false;
+ spin_unlock(&nn->client_lock);
}
static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
@@ -8064,7 +8077,7 @@ static int nfs4_state_create_net(struct net *net)
INIT_LIST_HEAD(&nn->blocked_locks_lru);
INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
- INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
+ INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
get_net(net);
nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
@@ -8171,6 +8184,7 @@ nfs4_state_shutdown_net(struct net *net)
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
unregister_shrinker(&nn->nfsd_client_shrinker);
+ cancel_work(&nn->nfsd_shrinker_work);
cancel_delayed_work_sync(&nn->laundromat_work);
locks_end_grace(&nn->nfsd4_manager);