On Sat, Aug 31, 2024 at 06:37:36PM -0400, Mike Snitzer wrote: > From: Weston Andros Adamson <dros@xxxxxxxxxxxxxxx> > > Add server support for bypassing NFS for localhost reads, writes, and > commits. This is only useful when both the client and server are > running on the same host. > > If nfsd_open_local_fh() fails then the NFS client will both retry and > fallback to normal network-based read, write and commit operations if > localio is no longer supported. > > Care is taken to ensure the same NFS security mechanisms are used > (authentication, etc) regardless of whether localio or regular NFS > access is used. The auth_domain established as part of the traditional > NFS client access to the NFS server is also used for localio. Store > auth_domain for localio in nfsd_uuid_t and transfer it to the client > if it is local to the server. > > Relative to containers, localio gives the client access to the network > namespace the server has. This is required to allow the client to > access the server's per-namespace nfsd_net struct. > > This commit also introduces the use of NFSD's percpu_ref to interlock > nfsd_destroy_serv and nfsd_open_local_fh, to ensure nn->nfsd_serv is > not destroyed while in use by nfsd_open_local_fh and other LOCALIO > client code. > > CONFIG_NFS_LOCALIO enables NFS server support for LOCALIO. > > Signed-off-by: Weston Andros Adamson <dros@xxxxxxxxxxxxxxx> > Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> > Co-developed-by: Mike Snitzer <snitzer@xxxxxxxxxx> > Signed-off-by: Mike Snitzer <snitzer@xxxxxxxxxx> > Co-developed-by: NeilBrown <neilb@xxxxxxx> > Signed-off-by: NeilBrown <neilb@xxxxxxx> > > Not-Acked-by: Chuck Lever <chuck.lever@xxxxxxxxxx> > Not-Reviewed-by: Jeff Layton <jlayton@xxxxxxxxxx> > --- > fs/nfsd/Makefile | 1 + > fs/nfsd/filecache.c | 2 +- > fs/nfsd/localio.c | 112 +++++++++++++++++++++++++++++++++++++ > fs/nfsd/netns.h | 4 ++ > fs/nfsd/nfsctl.c | 25 ++++++++- > fs/nfsd/trace.h | 3 +- > fs/nfsd/vfs.h | 2 + > include/linux/nfslocalio.h | 8 +++ > 8 files changed, 154 insertions(+), 3 deletions(-) > create mode 100644 fs/nfsd/localio.c > > diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile > index b8736a82e57c..18cbd3fa7691 100644 > --- a/fs/nfsd/Makefile > +++ b/fs/nfsd/Makefile > @@ -23,3 +23,4 @@ nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o > nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o > nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o > nfsd-$(CONFIG_NFSD_FLEXFILELAYOUT) += flexfilelayout.o flexfilelayoutxdr.o > +nfsd-$(CONFIG_NFS_LOCALIO) += localio.o > diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c > index 89ff380ec31e..348c1b97092e 100644 > --- a/fs/nfsd/filecache.c > +++ b/fs/nfsd/filecache.c > @@ -52,7 +52,7 @@ > #define NFSD_FILE_CACHE_UP (0) > > /* We only care about NFSD_MAY_READ/WRITE for this cache */ > -#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) > +#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE|NFSD_MAY_LOCALIO) > > static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); > static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); > diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c > new file mode 100644 > index 000000000000..75df709c6903 > --- /dev/null > +++ b/fs/nfsd/localio.c > @@ -0,0 +1,112 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * NFS server support for local clients to bypass network stack > + * > + * Copyright (C) 2014 Weston Andros Adamson <dros@xxxxxxxxxxxxxxx> > + * Copyright (C) 2019 Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> > + * Copyright (C) 2024 Mike Snitzer <snitzer@xxxxxxxxxxxxxxx> > + * Copyright (C) 2024 NeilBrown <neilb@xxxxxxx> > + */ > + > +#include <linux/exportfs.h> > +#include <linux/sunrpc/svcauth.h> > +#include <linux/sunrpc/clnt.h> > +#include <linux/nfs.h> > +#include <linux/nfs_common.h> > +#include <linux/nfslocalio.h> > +#include <linux/string.h> > + > +#include "nfsd.h" > +#include "vfs.h" > +#include "netns.h" > +#include "filecache.h" > + > +static const struct nfsd_localio_operations nfsd_localio_ops = { > + .nfsd_open_local_fh = nfsd_open_local_fh, > + .nfsd_file_put_local = nfsd_file_put_local, > + .nfsd_file_file = nfsd_file_file, > +}; > + > +void nfsd_localio_ops_init(void) > +{ > + memcpy(&nfs_to, &nfsd_localio_ops, sizeof(nfsd_localio_ops)); > +} Same comment as Neil: this should surface a pointer to the localio_ops struct. Copying the whole set of function pointers is generally unnecessary. > + > +/** > + * nfsd_open_local_fh - lookup a local filehandle @nfs_fh and map to nfsd_file > + * > + * @uuid: nfs_uuid_t which provides the 'struct net' to get the proper nfsd_net > + * and the 'struct auth_domain' required for LOCALIO access > + * @rpc_clnt: rpc_clnt that the client established, used for sockaddr and cred > + * @cred: cred that the client established > + * @nfs_fh: filehandle to lookup > + * @fmode: fmode_t to use for open > + * > + * This function maps a local fh to a path on a local filesystem. > + * This is useful when the nfs client has the local server mounted - it can > + * avoid all the NFS overhead with reads, writes and commits. > + * > + * On successful return, returned nfsd_file will have its nf_net member > + * set. Caller (NFS client) is responsible for calling nfsd_serv_put and > + * nfsd_file_put (via nfs_to.nfsd_file_put_local). > + */ > +struct nfsd_file * > +nfsd_open_local_fh(nfs_uuid_t *uuid, > + struct rpc_clnt *rpc_clnt, const struct cred *cred, > + const struct nfs_fh *nfs_fh, const fmode_t fmode) > + __must_hold(rcu) > +{ > + int mayflags = NFSD_MAY_LOCALIO; > + struct nfsd_net *nn = NULL; > + struct net *net; > + struct svc_cred rq_cred; > + struct svc_fh fh; > + struct nfsd_file *localio; > + __be32 beres; > + > + if (nfs_fh->size > NFS4_FHSIZE) > + return ERR_PTR(-EINVAL); > + > + /* > + * Not running in nfsd context, so must safely get reference on nfsd_serv. > + * But the server may already be shutting down, if so disallow new localio. > + * uuid->net is NOT a counted reference, but caller's rcu_read_lock() ensures > + * that if uuid->net is not NULL, then calling nfsd_serv_try_get() is safe > + * and if it succeeds we will have an implied reference to the net. > + */ > + net = rcu_dereference(uuid->net); > + if (net) > + nn = net_generic(net, nfsd_net_id); > + if (unlikely(!nn || !nfsd_serv_try_get(nn))) > + return ERR_PTR(-ENXIO); > + > + /* Drop the rcu lock for nfsd_file_acquire_local() */ > + rcu_read_unlock(); I'm struggling with the locking logistics. Caller takes the RCU read lock, this function drops the lock, then takes it again. So: - A caller might rely on the lock being held continuously, but - The API contract documented above doesn't indicate that this function drops that lock - The __must_hold(rcu) annotation doesn't indicate that this function drops that lock, IIUC Dropping and retaking the lock in here is an anti-pattern that should be avoided. I suggest we are better off in the long run if the caller does not need to take the RCU read lock, but instead, nfsd_open_local_fh takes it right here just for the rcu_dereference. OTOH, Why drop the lock before calling nfsd_file_acquire_local()? The RCU read lock can safely be taken more than once in succession. Let's rethink the locking strategy. > + > + /* nfs_fh -> svc_fh */ > + fh_init(&fh, NFS4_FHSIZE); > + fh.fh_handle.fh_size = nfs_fh->size; > + memcpy(fh.fh_handle.fh_raw, nfs_fh->data, nfs_fh->size); > + > + if (fmode & FMODE_READ) > + mayflags |= NFSD_MAY_READ; > + if (fmode & FMODE_WRITE) > + mayflags |= NFSD_MAY_WRITE; > + > + svcauth_map_clnt_to_svc_cred_local(rpc_clnt, cred, &rq_cred); > + > + beres = nfsd_file_acquire_local(uuid->net, &rq_cred, uuid->dom, > + &fh, mayflags, &localio); > + if (beres) { > + localio = ERR_PTR(nfs_stat_to_errno(be32_to_cpu(beres))); > + nfsd_serv_put(nn); > + } > + > + fh_put(&fh); > + if (rq_cred.cr_group_info) > + put_group_info(rq_cred.cr_group_info); > + > + rcu_read_lock(); > + return localio; > +} > +EXPORT_SYMBOL_GPL(nfsd_open_local_fh); > diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h > index e2d953f21dde..0fd31188a951 100644 > --- a/fs/nfsd/netns.h > +++ b/fs/nfsd/netns.h > @@ -216,6 +216,10 @@ struct nfsd_net { > /* last time an admin-revoke happened for NFSv4.0 */ > time64_t nfs40_last_revoke; > > +#if IS_ENABLED(CONFIG_NFS_LOCALIO) > + /* Local clients to be invalidated when net is shut down */ > + struct list_head local_clients; > +#endif > }; > > /* Simple check to find out if a given net was properly initialized */ > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c > index 64c1b4d649bc..3adbc05ebaac 100644 > --- a/fs/nfsd/nfsctl.c > +++ b/fs/nfsd/nfsctl.c > @@ -18,6 +18,7 @@ > #include <linux/sunrpc/svc.h> > #include <linux/module.h> > #include <linux/fsnotify.h> > +#include <linux/nfslocalio.h> > > #include "idmap.h" > #include "nfsd.h" > @@ -2257,7 +2258,9 @@ static __net_init int nfsd_net_init(struct net *net) > get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); > seqlock_init(&nn->writeverf_lock); > nfsd_proc_stat_init(net); > - > +#if IS_ENABLED(CONFIG_NFS_LOCALIO) > + INIT_LIST_HEAD(&nn->local_clients); > +#endif > return 0; > > out_repcache_error: > @@ -2268,6 +2271,22 @@ static __net_init int nfsd_net_init(struct net *net) > return retval; > } > > +#if IS_ENABLED(CONFIG_NFS_LOCALIO) > +/** > + * nfsd_net_pre_exit - Disconnect localio clients from net namespace > + * @net: a network namespace that is about to be destroyed > + * > + * This invalidated ->net pointers held by localio clients > + * while they can still safely access nn->counter. > + */ > +static __net_exit void nfsd_net_pre_exit(struct net *net) > +{ > + struct nfsd_net *nn = net_generic(net, nfsd_net_id); > + > + nfs_uuid_invalidate_clients(&nn->local_clients); > +} > +#endif > + > /** > * nfsd_net_exit - Release the nfsd_net portion of a net namespace > * @net: a network namespace that is about to be destroyed > @@ -2285,6 +2304,9 @@ static __net_exit void nfsd_net_exit(struct net *net) > > static struct pernet_operations nfsd_net_ops = { > .init = nfsd_net_init, > +#if IS_ENABLED(CONFIG_NFS_LOCALIO) > + .pre_exit = nfsd_net_pre_exit, > +#endif > .exit = nfsd_net_exit, > .id = &nfsd_net_id, > .size = sizeof(struct nfsd_net), > @@ -2322,6 +2344,7 @@ static int __init init_nfsd(void) > retval = genl_register_family(&nfsd_nl_family); > if (retval) > goto out_free_all; > + nfsd_localio_ops_init(); > > return 0; > out_free_all: > diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h > index d22027e23761..82bcefcd1f21 100644 > --- a/fs/nfsd/trace.h > +++ b/fs/nfsd/trace.h > @@ -86,7 +86,8 @@ DEFINE_NFSD_XDR_ERR_EVENT(cant_encode); > { NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" }, \ > { NFSD_MAY_BYPASS_GSS, "BYPASS_GSS" }, \ > { NFSD_MAY_READ_IF_EXEC, "READ_IF_EXEC" }, \ > - { NFSD_MAY_64BIT_COOKIE, "64BIT_COOKIE" }) > + { NFSD_MAY_64BIT_COOKIE, "64BIT_COOKIE" }, \ > + { NFSD_MAY_LOCALIO, "LOCALIO" }) > > TRACE_EVENT(nfsd_compound, > TP_PROTO( > diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h > index 01947561d375..3ff146522556 100644 > --- a/fs/nfsd/vfs.h > +++ b/fs/nfsd/vfs.h > @@ -33,6 +33,8 @@ > > #define NFSD_MAY_64BIT_COOKIE 0x1000 /* 64 bit readdir cookies for >= NFSv3 */ > > +#define NFSD_MAY_LOCALIO 0x2000 /* for tracing, reflects when localio used */ > + > #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) > #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) > > diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h > index 62419c4bc8f1..61f2c781dd50 100644 > --- a/include/linux/nfslocalio.h > +++ b/include/linux/nfslocalio.h > @@ -6,6 +6,8 @@ > #ifndef __LINUX_NFSLOCALIO_H > #define __LINUX_NFSLOCALIO_H > > +#if IS_ENABLED(CONFIG_NFS_LOCALIO) > + > #include <linux/module.h> > #include <linux/list.h> > #include <linux/uuid.h> > @@ -63,4 +65,10 @@ struct nfsd_localio_operations { > extern void nfsd_localio_ops_init(void); > extern struct nfsd_localio_operations nfs_to; > > +#else /* CONFIG_NFS_LOCALIO */ > +static inline void nfsd_localio_ops_init(void) > +{ > +} > +#endif /* CONFIG_NFS_LOCALIO */ > + > #endif /* __LINUX_NFSLOCALIO_H */ > -- > 2.44.0 > -- Chuck Lever