From: Weston Andros Adamson <dros@xxxxxxxxxxxxxxx> Add server support for bypassing NFS for localhost reads, writes, and commits. This is only useful when both the client and server are running on the same host. If nfsd_open_local_fh() fails then the NFS client will both retry and fallback to normal network-based read, write and commit operations if localio is no longer supported. Care is taken to ensure the same NFS security mechanisms are used (authentication, etc) regardless of whether localio or regular NFS access is used. The auth_domain established as part of the traditional NFS client access to the NFS server is also used for localio. Store auth_domain for localio in nfsd_uuid_t and transfer it to the client if it is local to the server. Relative to containers, localio gives the client access to the network namespace the server has. This is required to allow the client to access the server's per-namespace nfsd_net struct. CONFIG_NFSD_LOCALIO controls the server enablement for localio. A later commit will add CONFIG_NFS_LOCALIO to allow the client enablement. This commit also introduces the use of nfsd's percpu_ref to interlock nfsd_destroy_serv and nfsd_open_local_fh, to ensure nn->nfsd_serv is not destroyed while in use by nfsd_open_local_fh, and warrants a more detailed explanation: nfsd_open_local_fh uses nfsd_serv_try_get before opening its file handle and then the reference must be dropped by the caller using nfsd_serv_put (via nfs_localio_ctx_free). Verified to fix an easy to hit crash that would occur if an nfsd instance running in a container, with a localio client mounted, is shutdown. Upon restart of the container and associated nfsd the client would go on to crash due to NULL pointer dereference that occurred due to the nfs client's localio attempting to nfsd_open_local_fh(), using nn->nfsd_serv, without having a proper reference on nn->nfsd_serv. Signed-off-by: Weston Andros Adamson <dros@xxxxxxxxxxxxxxx> Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> Co-developed-by: Mike Snitzer <snitzer@xxxxxxxxxx> Signed-off-by: Mike Snitzer <snitzer@xxxxxxxxxx> Acked-by: Chuck Lever <chuck.lever@xxxxxxxxxx> Reviewed-by: Jeff Layton <jlayton@xxxxxxxxxx> --- fs/Kconfig | 3 ++ fs/nfsd/Kconfig | 16 +++++++ fs/nfsd/Makefile | 1 + fs/nfsd/filecache.c | 2 +- fs/nfsd/localio.c | 114 ++++++++++++++++++++++++++++++++++++++++++++ fs/nfsd/trace.h | 3 +- fs/nfsd/vfs.h | 8 ++++ 7 files changed, 145 insertions(+), 2 deletions(-) create mode 100644 fs/nfsd/localio.c diff --git a/fs/Kconfig b/fs/Kconfig index a46b0cbc4d8f..1b8a5edbddff 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -377,6 +377,9 @@ config NFS_ACL_SUPPORT tristate select FS_POSIX_ACL +config NFS_COMMON_LOCALIO_SUPPORT + bool + config NFS_COMMON bool depends on NFSD || NFS_FS || LOCKD diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index c0bd1509ccd4..e6fa7eaa1db0 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -90,6 +90,22 @@ config NFSD_V4 If unsure, say N. +config NFSD_LOCALIO + bool "NFS server support for the LOCALIO auxiliary protocol" + depends on NFSD + select NFS_COMMON_LOCALIO_SUPPORT + default n + help + Some NFS servers support an auxiliary NFS LOCALIO protocol + that is not an official part of the NFS protocol. + + This option enables support for the LOCALIO protocol in the + kernel's NFS server. Enable this to permit local NFS clients + to bypass the network when issuing reads and writes to the + local NFS server. + + If unsure, say N. + config NFSD_PNFS bool diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index b8736a82e57c..78b421778a79 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -23,3 +23,4 @@ nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o nfsd-$(CONFIG_NFSD_FLEXFILELAYOUT) += flexfilelayout.o flexfilelayoutxdr.o +nfsd-$(CONFIG_NFSD_LOCALIO) += localio.o diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index a83d469bca6b..49f4aab3208a 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -53,7 +53,7 @@ #define NFSD_FILE_CACHE_UP (0) /* We only care about NFSD_MAY_READ/WRITE for this cache */ -#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) +#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE|NFSD_MAY_LOCALIO) static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c new file mode 100644 index 000000000000..637402eecb61 --- /dev/null +++ b/fs/nfsd/localio.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * NFS server support for local clients to bypass network stack + * + * Copyright (C) 2014 Weston Andros Adamson <dros@xxxxxxxxxxxxxxx> + * Copyright (C) 2019 Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> + * Copyright (C) 2024 Mike Snitzer <snitzer@xxxxxxxxxxxxxxx> + */ + +#include <linux/exportfs.h> +#include <linux/sunrpc/svcauth.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs_common.h> +#include <linux/nfslocalio.h> +#include <linux/string.h> + +#include "nfsd.h" +#include "vfs.h" +#include "netns.h" +#include "filecache.h" + +/** + * nfsd_open_local_fh - lookup a local filehandle @nfs_fh and map to nfsd_file + * + * @cl_nfssvc_net: the 'struct net' to use to get the proper nfsd_net + * @cl_nfssvc_dom: the 'struct auth_domain' required for localio access + * @rpc_clnt: rpc_clnt that the client established, used for sockaddr and cred + * @cred: cred that the client established + * @nfs_fh: filehandle to lookup + * @fmode: fmode_t to use for open + * + * This function maps a local fh to a path on a local filesystem. + * This is useful when the nfs client has the local server mounted - it can + * avoid all the NFS overhead with reads, writes and commits. + * + * On successful return, returned nfs_localio_ctx will have its nfsd_file and + * nfsd_net members set. Caller is responsible for calling nfsd_file_put and + * nfsd_serv_put (via nfs_localio_ctx_free). + */ +struct nfs_localio_ctx * +nfsd_open_local_fh(nfs_uuid_t *uuid, + struct rpc_clnt *rpc_clnt, const struct cred *cred, + const struct nfs_fh *nfs_fh, const fmode_t fmode) +{ + int mayflags = NFSD_MAY_LOCALIO; + int status = 0; + struct nfsd_net *nn = NULL; + struct net *net; + struct svc_cred rq_cred; + struct svc_fh fh; + struct nfs_localio_ctx *localio; + __be32 beres; + + if (nfs_fh->size > NFS4_FHSIZE) + return ERR_PTR(-EINVAL); + + localio = nfs_localio_ctx_alloc(); + if (!localio) + return ERR_PTR(-ENOMEM); + + /* + * Not running in nfsd context, so must safely get reference on nfsd_serv. + * But the server may already be shutting down, if so disallow new localio. + * uuid->net is NOT a counted reference, but rcu_read_lock() ensures that if + * uuid->net is not NULL, then nfsd_serv_try_get() is safe and if that succeeds + * we will have an implied reference to the net. + */ + rcu_read_lock(); + net = READ_ONCE(uuid->net); + if (net) + nn = net_generic(net, nfsd_net_id); + if (unlikely(!nn || !nfsd_serv_try_get(nn))) { + rcu_read_unlock(); + status = -ENXIO; + goto out_nfsd_serv; + } + rcu_read_unlock(); + + /* nfs_fh -> svc_fh */ + fh_init(&fh, NFS4_FHSIZE); + fh.fh_handle.fh_size = nfs_fh->size; + memcpy(fh.fh_handle.fh_raw, nfs_fh->data, nfs_fh->size); + + if (fmode & FMODE_READ) + mayflags |= NFSD_MAY_READ; + if (fmode & FMODE_WRITE) + mayflags |= NFSD_MAY_WRITE; + + svcauth_map_clnt_to_svc_cred_local(rpc_clnt, cred, &rq_cred); + + beres = nfsd_file_acquire_local(uuid->net, &rq_cred, uuid->dom, + &fh, mayflags, &localio->nf); + if (beres) { + status = nfs_stat_to_errno(be32_to_cpu(beres)); + goto out_fh_put; + } + localio->nn = nn; + +out_fh_put: + fh_put(&fh); + if (rq_cred.cr_group_info) + put_group_info(rq_cred.cr_group_info); +out_nfsd_serv: + if (status) { + nfs_localio_ctx_free(localio); + return ERR_PTR(status); + } + return localio; +} +EXPORT_SYMBOL_GPL(nfsd_open_local_fh); + +/* Compile time type checking, not used by anything */ +static nfs_to_nfsd_open_local_fh_t __maybe_unused nfsd_open_local_fh_typecheck = nfsd_open_local_fh; diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index d22027e23761..82bcefcd1f21 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -86,7 +86,8 @@ DEFINE_NFSD_XDR_ERR_EVENT(cant_encode); { NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" }, \ { NFSD_MAY_BYPASS_GSS, "BYPASS_GSS" }, \ { NFSD_MAY_READ_IF_EXEC, "READ_IF_EXEC" }, \ - { NFSD_MAY_64BIT_COOKIE, "64BIT_COOKIE" }) + { NFSD_MAY_64BIT_COOKIE, "64BIT_COOKIE" }, \ + { NFSD_MAY_LOCALIO, "LOCALIO" }) TRACE_EVENT(nfsd_compound, TP_PROTO( diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 01947561d375..2ecceb8b9d3d 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -8,6 +8,7 @@ #include <linux/fs.h> #include <linux/posix_acl.h> +#include <linux/nfslocalio.h> #include "nfsfh.h" #include "nfsd.h" @@ -33,6 +34,8 @@ #define NFSD_MAY_64BIT_COOKIE 0x1000 /* 64 bit readdir cookies for >= NFSv3 */ +#define NFSD_MAY_LOCALIO 0x2000 /* for tracing, reflects when localio used */ + #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) @@ -158,6 +161,11 @@ __be32 nfsd_permission(struct svc_cred *cred, struct svc_export *exp, void nfsd_filp_close(struct file *fp); +struct nfs_localio_ctx * +nfsd_open_local_fh(nfs_uuid_t *, + struct rpc_clnt *, const struct cred *, + const struct nfs_fh *, const fmode_t); + static inline int fh_want_write(struct svc_fh *fh) { int ret; -- 2.44.0