This tracking enables __nfsd_file_cache_purge() to call nfs_localio_invalidate_clients(), upon shutdown or export change, to nfs_close_local_fh() all open nfsd_files that are still cached by the LOCALIO nfs clients associated with nfsd_net that is being shutdown. Now that the client must track all open nfsd_files there was more work than necessary being done with the global nfs_uuids_lock contended. This manifested in various RCU issues, e.g.: hrtimer: interrupt took 47969440 ns rcu: INFO: rcu_sched detected stalls on CPUs/tasks: Use nfs_uuid->lock to protect all nfs_uuid_t members, instead of nfs_uuids_lock, once nfs_uuid_is_local() adds the client to nn->local_clients. Also add 'local_clients_lock' to 'struct nfsd_net' to protect nn->local_clients. And store a pointer to spinlock in the 'list_lock' member of nfs_uuid_t so nfs_localio_disable_client() can use it to avoid taking the global nfs_uuids_lock. In combination, these split out locks eliminate the use of the single nfslocalio.c global nfs_uuids_lock in the IO paths (open and close). Also refactored associated fs/nfs_common/nfslocalio.c methods' locking to reduce work performed with spinlocks held in general. Signed-off-by: Mike Snitzer <snitzer@xxxxxxxxxx> --- fs/nfs_common/nfslocalio.c | 166 +++++++++++++++++++++++++++---------- fs/nfsd/filecache.c | 9 ++ fs/nfsd/localio.c | 1 + fs/nfsd/netns.h | 1 + fs/nfsd/nfsctl.c | 4 +- include/linux/nfslocalio.h | 8 +- 6 files changed, 143 insertions(+), 46 deletions(-) diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c index e58b5b4b4c3a..c8d18f671bcb 100644 --- a/fs/nfs_common/nfslocalio.c +++ b/fs/nfs_common/nfslocalio.c @@ -23,27 +23,49 @@ static DEFINE_SPINLOCK(nfs_uuids_lock); */ static LIST_HEAD(nfs_uuids); +/* + * Lock ordering: + * 1: nfs_uuid->lock + * 2: nfs_uuids_lock + * 3: nfs_uuid->list_lock (aka nn->local_clients_lock) + * + * May skip locks in select cases, but never hold multiple + * locks out of order. + */ + void nfs_uuid_init(nfs_uuid_t *nfs_uuid) { nfs_uuid->net = NULL; nfs_uuid->dom = NULL; + nfs_uuid->list_lock = NULL; INIT_LIST_HEAD(&nfs_uuid->list); + INIT_LIST_HEAD(&nfs_uuid->files); spin_lock_init(&nfs_uuid->lock); } EXPORT_SYMBOL_GPL(nfs_uuid_init); bool nfs_uuid_begin(nfs_uuid_t *nfs_uuid) { + spin_lock(&nfs_uuid->lock); + if (nfs_uuid->net) { + /* This nfs_uuid is already in use */ + spin_unlock(&nfs_uuid->lock); + return false; + } + spin_lock(&nfs_uuids_lock); - /* Is this nfs_uuid already in use? */ if (!list_empty(&nfs_uuid->list)) { + /* This nfs_uuid is already in use */ spin_unlock(&nfs_uuids_lock); + spin_unlock(&nfs_uuid->lock); return false; } - uuid_gen(&nfs_uuid->uuid); list_add_tail(&nfs_uuid->list, &nfs_uuids); spin_unlock(&nfs_uuids_lock); + uuid_gen(&nfs_uuid->uuid); + spin_unlock(&nfs_uuid->lock); + return true; } EXPORT_SYMBOL_GPL(nfs_uuid_begin); @@ -51,11 +73,15 @@ EXPORT_SYMBOL_GPL(nfs_uuid_begin); void nfs_uuid_end(nfs_uuid_t *nfs_uuid) { if (nfs_uuid->net == NULL) { - spin_lock(&nfs_uuids_lock); - if (nfs_uuid->net == NULL) + spin_lock(&nfs_uuid->lock); + if (nfs_uuid->net == NULL) { + /* Not local, remove from nfs_uuids */ + spin_lock(&nfs_uuids_lock); list_del_init(&nfs_uuid->list); - spin_unlock(&nfs_uuids_lock); - } + spin_unlock(&nfs_uuids_lock); + } + spin_unlock(&nfs_uuid->lock); + } } EXPORT_SYMBOL_GPL(nfs_uuid_end); @@ -73,28 +99,39 @@ static nfs_uuid_t * nfs_uuid_lookup_locked(const uuid_t *uuid) static struct module *nfsd_mod; void nfs_uuid_is_local(const uuid_t *uuid, struct list_head *list, - struct net *net, struct auth_domain *dom, - struct module *mod) + spinlock_t *list_lock, struct net *net, + struct auth_domain *dom, struct module *mod) { nfs_uuid_t *nfs_uuid; spin_lock(&nfs_uuids_lock); nfs_uuid = nfs_uuid_lookup_locked(uuid); - if (nfs_uuid) { - kref_get(&dom->ref); - nfs_uuid->dom = dom; - /* - * We don't hold a ref on the net, but instead put - * ourselves on a list so the net pointer can be - * invalidated. - */ - list_move(&nfs_uuid->list, list); - rcu_assign_pointer(nfs_uuid->net, net); - - __module_get(mod); - nfsd_mod = mod; + if (!nfs_uuid) { + spin_unlock(&nfs_uuids_lock); + return; } + + /* + * We don't hold a ref on the net, but instead put + * ourselves on @list (nn->local_clients) so the net + * pointer can be invalidated. + */ + spin_lock(list_lock); /* list_lock is nn->local_clients_lock */ + list_move(&nfs_uuid->list, list); + spin_unlock(list_lock); + spin_unlock(&nfs_uuids_lock); + /* Once nfs_uuid is parented to @list, avoid global nfs_uuids_lock */ + spin_lock(&nfs_uuid->lock); + + __module_get(mod); + nfsd_mod = mod; + + nfs_uuid->list_lock = list_lock; + kref_get(&dom->ref); + nfs_uuid->dom = dom; + rcu_assign_pointer(nfs_uuid->net, net); + spin_unlock(&nfs_uuid->lock); } EXPORT_SYMBOL_GPL(nfs_uuid_is_local); @@ -108,55 +145,96 @@ void nfs_localio_enable_client(struct nfs_client *clp) } EXPORT_SYMBOL_GPL(nfs_localio_enable_client); -static void nfs_uuid_put_locked(nfs_uuid_t *nfs_uuid) +/* + * Cleanup the nfs_uuid_t embedded in an nfs_client. + * This is the long-form of nfs_uuid_init(). + */ +static void nfs_uuid_put(nfs_uuid_t *nfs_uuid) { - if (!nfs_uuid->net) + LIST_HEAD(local_files); + struct nfs_file_localio *nfl, *tmp; + + spin_lock(&nfs_uuid->lock); + if (unlikely(!nfs_uuid->net)) { + spin_unlock(&nfs_uuid->lock); return; - module_put(nfsd_mod); + } rcu_assign_pointer(nfs_uuid->net, NULL); if (nfs_uuid->dom) { auth_domain_put(nfs_uuid->dom); nfs_uuid->dom = NULL; } - list_del_init(&nfs_uuid->list); + + list_splice_init(&nfs_uuid->files, &local_files); + spin_unlock(&nfs_uuid->lock); + + /* Walk list of files and ensure their last references dropped */ + list_for_each_entry_safe(nfl, tmp, &local_files, list) { + nfs_close_local_fh(nfl); + cond_resched(); + } + + spin_lock(&nfs_uuid->lock); + BUG_ON(!list_empty(&nfs_uuid->files)); + + /* Remove client from nn->local_clients */ + if (nfs_uuid->list_lock) { + spin_lock(nfs_uuid->list_lock); + BUG_ON(list_empty(&nfs_uuid->list)); + list_del_init(&nfs_uuid->list); + spin_unlock(nfs_uuid->list_lock); + nfs_uuid->list_lock = NULL; + } + + module_put(nfsd_mod); + spin_unlock(&nfs_uuid->lock); } void nfs_localio_disable_client(struct nfs_client *clp) { - nfs_uuid_t *nfs_uuid = &clp->cl_uuid; + nfs_uuid_t *nfs_uuid = NULL; - spin_lock(&nfs_uuid->lock); + spin_lock(&clp->cl_uuid.lock); /* aka &nfs_uuid->lock */ if (test_and_clear_bit(NFS_CS_LOCAL_IO, &clp->cl_flags)) { - spin_lock(&nfs_uuids_lock); - nfs_uuid_put_locked(nfs_uuid); - spin_unlock(&nfs_uuids_lock); + /* &clp->cl_uuid is always not NULL, using as bool here */ + nfs_uuid = &clp->cl_uuid; } - spin_unlock(&nfs_uuid->lock); + spin_unlock(&clp->cl_uuid.lock); + + if (nfs_uuid) + nfs_uuid_put(nfs_uuid); } EXPORT_SYMBOL_GPL(nfs_localio_disable_client); -void nfs_localio_invalidate_clients(struct list_head *cl_uuid_list) +void nfs_localio_invalidate_clients(struct list_head *nn_local_clients, + spinlock_t *nn_local_clients_lock) { + LIST_HEAD(local_clients); nfs_uuid_t *nfs_uuid, *tmp; + struct nfs_client *clp; - spin_lock(&nfs_uuids_lock); - list_for_each_entry_safe(nfs_uuid, tmp, cl_uuid_list, list) { - struct nfs_client *clp = - container_of(nfs_uuid, struct nfs_client, cl_uuid); - + spin_lock(nn_local_clients_lock); + list_splice_init(nn_local_clients, &local_clients); + spin_unlock(nn_local_clients_lock); + list_for_each_entry_safe(nfs_uuid, tmp, &local_clients, list) { + if (WARN_ON(nfs_uuid->list_lock != nn_local_clients_lock)) + break; + clp = container_of(nfs_uuid, struct nfs_client, cl_uuid); nfs_localio_disable_client(clp); } - spin_unlock(&nfs_uuids_lock); } EXPORT_SYMBOL_GPL(nfs_localio_invalidate_clients); static void nfs_uuid_add_file(nfs_uuid_t *nfs_uuid, struct nfs_file_localio *nfl) { - spin_lock(&nfs_uuids_lock); - if (!nfl->nfs_uuid) + /* Add nfl to nfs_uuid->files if it isn't already */ + spin_lock(&nfs_uuid->lock); + if (list_empty(&nfl->list)) { rcu_assign_pointer(nfl->nfs_uuid, nfs_uuid); - spin_unlock(&nfs_uuids_lock); + list_add_tail(&nfl->list, &nfs_uuid->files); + } + spin_unlock(&nfs_uuid->lock); } /* @@ -217,14 +295,16 @@ void nfs_close_local_fh(struct nfs_file_localio *nfl) ro_nf = rcu_access_pointer(nfl->ro_file); rw_nf = rcu_access_pointer(nfl->rw_file); if (ro_nf || rw_nf) { - spin_lock(&nfs_uuids_lock); + spin_lock(&nfs_uuid->lock); if (ro_nf) ro_nf = rcu_dereference_protected(xchg(&nfl->ro_file, NULL), 1); if (rw_nf) rw_nf = rcu_dereference_protected(xchg(&nfl->rw_file, NULL), 1); + /* Remove nfl from nfs_uuid->files list */ rcu_assign_pointer(nfl->nfs_uuid, NULL); - spin_unlock(&nfs_uuids_lock); + list_del_init(&nfl->list); + spin_unlock(&nfs_uuid->lock); rcu_read_unlock(); if (ro_nf) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index ab9942e42054..c9ab64e3732c 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -39,6 +39,7 @@ #include <linux/fsnotify.h> #include <linux/seq_file.h> #include <linux/rhashtable.h> +#include <linux/nfslocalio.h> #include "vfs.h" #include "nfsd.h" @@ -836,6 +837,14 @@ __nfsd_file_cache_purge(struct net *net) struct nfsd_file *nf; LIST_HEAD(dispose); +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + if (net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + nfs_localio_invalidate_clients(&nn->local_clients, + &nn->local_clients_lock); + } +#endif + rhltable_walk_enter(&nfsd_file_rhltable, &iter); do { rhashtable_walk_start(&iter); diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c index 2ae07161b919..238647fa379e 100644 --- a/fs/nfsd/localio.c +++ b/fs/nfsd/localio.c @@ -116,6 +116,7 @@ static __be32 localio_proc_uuid_is_local(struct svc_rqst *rqstp) struct nfsd_net *nn = net_generic(net, nfsd_net_id); nfs_uuid_is_local(&argp->uuid, &nn->local_clients, + &nn->local_clients_lock, net, rqstp->rq_client, THIS_MODULE); return rpc_success; diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 8faef59d7122..187c4140b191 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -220,6 +220,7 @@ struct nfsd_net { #if IS_ENABLED(CONFIG_NFS_LOCALIO) /* Local clients to be invalidated when net is shut down */ + spinlock_t local_clients_lock; struct list_head local_clients; #endif }; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 727904d8a4d0..70347b0ecdc4 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -2259,6 +2259,7 @@ static __net_init int nfsd_net_init(struct net *net) seqlock_init(&nn->writeverf_lock); nfsd_proc_stat_init(net); #if IS_ENABLED(CONFIG_NFS_LOCALIO) + spin_lock_init(&nn->local_clients_lock); INIT_LIST_HEAD(&nn->local_clients); #endif return 0; @@ -2283,7 +2284,8 @@ static __net_exit void nfsd_net_pre_exit(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - nfs_localio_invalidate_clients(&nn->local_clients); + nfs_localio_invalidate_clients(&nn->local_clients, + &nn->local_clients_lock); } #endif diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index aa2b5c6561ab..c68a529230c1 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -30,19 +30,23 @@ typedef struct { /* sadly this struct is just over a cacheline, avoid bouncing */ spinlock_t ____cacheline_aligned lock; struct list_head list; + spinlock_t *list_lock; /* nn->local_clients_lock */ struct net __rcu *net; /* nfsd's network namespace */ struct auth_domain *dom; /* auth_domain for localio */ + /* Local files to close when net is shut down or exports change */ + struct list_head files; } nfs_uuid_t; void nfs_uuid_init(nfs_uuid_t *); bool nfs_uuid_begin(nfs_uuid_t *); void nfs_uuid_end(nfs_uuid_t *); -void nfs_uuid_is_local(const uuid_t *, struct list_head *, +void nfs_uuid_is_local(const uuid_t *, struct list_head *, spinlock_t *, struct net *, struct auth_domain *, struct module *); void nfs_localio_enable_client(struct nfs_client *clp); void nfs_localio_disable_client(struct nfs_client *clp); -void nfs_localio_invalidate_clients(struct list_head *list); +void nfs_localio_invalidate_clients(struct list_head *nn_local_clients, + spinlock_t *nn_local_clients_lock); /* localio needs to map filehandle -> struct nfsd_file */ extern struct nfsd_file * -- 2.44.0